Related
Just wondering how I can create a custom colour scheme based on conditions for a holoviews heatmap. I have created a column for colours that are based on conditions within the data. However, when I plot these the standard cmap appears but my colour scheme appears on the cells when I hover over them. Does anyone know how I can ignore the standard color map that is displaying or implement it so my conditional one appears instead. Example code below:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime
import holoviews as hv
from holoviews import opts
import panel as pn
from bokeh.resources import INLINE
from holoviews import dim
hv.extension('bokeh', 'matplotlib')
gv.extension('bokeh')
pd.options.plotting.backend = 'holoviews'
green = '#00FF00'
amber = '#FFFF00'
red = '#FF0000'
Data = [['A', 'Foo', 0.2] , ['B', 'Bar', 0.9], ['C', 'Cat', 0.7]]
df = pd.DataFrame(Data, columns = ['Name', 'Category', 'Value'])
df['colors'] = df.apply(lambda row: green if row['Value'] >= 0.9 else
amber if row['Value'] < 0.9 and row['Value'] >= 0.7 else
red if row['Value'] < 0.7 else '#8A2BE2', axis = 1)
df_hm = hv.HeatMap(df,kdims=['Category','Name'], vdims=['Value', 'colors']).opts(width=900, height=400, color = hv.dim('colors'), tools=['hover'])
When this code is ran I get the following, which is the standard cmap:
enter image description here
However, when I hover over the cell the color changes to scheme I want, unfortunatly I can't add a picture to show it. But does anyone know how I can make it only show the conditional colouring that I am after.
I've added a picutre of what is happening. When I hover over the cell you can see the conditonal coloring, however there is cmap color overlayed on to this, which I want to remove.
Current behavior
Thanks a bunch for any help!
You are using the wrong keyword in your ops() call. You have to use cmap instead of color.
Here is a very basic example, adapted from here.
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
factors = ["a", "b", "c", "d", "e", "f", "g", "h"]
x = [50, 40, 65, 10, 25, 37, 80, 60]
scatter = hv.Scatter((factors, x))
spikes = hv.Spikes(scatter)
x = ["foo", "foo", "foo", "bar", "bar", "bar", "baz", "baz", "baz"]
y = ["foo", "bar", "baz", "foo", "bar", "baz", "foo", "bar", "baz"]
z = [0, 1, 2, 3, 4, 5, 6, 7, 8]
colors = ['#00FF00','#FFFF00','#FF0000','#FFFF00','#FF0000', '#00FF00','#FF0000', '#00FF00','#FFFF00']
hv.HeatMap((x,y,z)).opts(width=450, height=400, cmap=colors, tools=['hover'])
Output
I have the following bar graph generated using pandas. My problem is all the bars have the same pattern. I have tried many approaches but could not manage to get around this issue.
Moreover, only one entry(for the last subplot) is shown in the legend.
The data used is
The code is :
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
class ScalarFormatterForceFormat(ScalarFormatter):
def _set_format(self): # Override function that finds format to use.
self.format = "%1.1f" # Give format here
patterns = [ "\\" , "/" , "-","+" ,"x", "|", '.', "O" ]
yfmt = ScalarFormatterForceFormat()
yfmt.set_powerlimits((0, 0))
bar_gap=0.005
bar_width=0.01
bar_pos = [0 for i in range(5)]
bar_pos[0]=bar_gap
for i in range(1,5):
bar_pos[i]=bar_pos[i-1]+bar_gap+bar_width
colors = ['tab:blue', 'tab:green', 'tab:orange', 'tab:red','tab:olive']
patterns = [ "\\" , "/" , "+" , "-", ".", "*","x", "o", "O" ]
# file_locn = ''r'C:\Users\girum\Desktop\Throughput.csv'''
file_locn = ''r'my_file.csv'''
df = pd.read_csv(file_locn,index_col='Set')
df=df.T
fig, axes = plt.subplots(1,3,figsize=(8,5))#,sharey=True)
for i in range(3):
axes[i].yaxis.set_major_formatter(yfmt)
df.Type_A.plot(ax=axes[0],kind='bar',color=colors)
df.Type_B.plot(ax=axes[1],kind='bar',color=colors)
df.Type_C.plot(ax=axes[2],kind='bar',color=colors)
handles, labels = axes[0].get_legend_handles_labels()
for ax in fig.axes:
bars = ax.patches
hatches = ''.join(h*len(df) for h in patterns)
for bar, hatch in zip(bars, hatches):
bar.set_hatch(2*hatch)
plt.xticks(rotation=360)
axes[0].set_ylabel('Speed')
for i in range(len(df)):
axes[i].set_xlabel('')#Why is this line not working
axes[i].tick_params(axis='x', rotation=360)
plt.legend(loc='center right', bbox_to_anchor=(.2,1.08), ncol=1)
plt.show()
The code below has the following changes:
added some dummy test data to enable stand-alone test code
removed some unused variables
used the unaltered ScalarFormatter
only one loop through the axes and avoiding the plt interface
using ax.containers[0] to catch the bar container (ax.patches is a list of the rectangles, without the surrounding container)
change the label of the bar container to _no_legend, so it doesn't appear in the legend
used the patterns directly instead of concatenating them
removed h*len(df); note that multiplying a string such as '/' by e.g. 4, repeats the string (to '////'); repeated patterns are used in matplotlib to make the base pattern denser
used tick_params(axis='x', labelbottom=False, length=0) to remove the tick labels
added labels to the individual bars so they appear into the legend
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
yfmt = ScalarFormatter()
yfmt.set_powerlimits((-9, 9))
colors = ['tab:blue', 'tab:green', 'tab:orange', 'tab:red', 'tab:olive']
patterns = ["\\", "/", "+", "-", ".", "*", "x", "o", "O"]
df = pd.DataFrame(np.random.randint(100000, 500000, (3, 3)),
columns=['A', 'B', 'C'],
index=['Type_A', 'Type_B', 'Type_C'])
df = df.T
fig, axes = plt.subplots(1, 3, figsize=(8, 5))
df.Type_A.plot(ax=axes[0], kind='bar', color=colors)
df.Type_B.plot(ax=axes[1], kind='bar', color=colors)
df.Type_C.plot(ax=axes[2], kind='bar', color=colors)
for ax in axes:
bars = ax.containers[0]
bars.set_label('_no_legend')
hatches = [h * 2 for h in patterns]
for bar, hatch, label in zip(bars, hatches, df.index):
bar.set_hatch(2 * hatch)
bar.set_label(label)
ax.yaxis.set_major_formatter(yfmt)
ax.tick_params(axis='x', labelbottom=False, length=0)
axes[0].set_ylabel('Speed')
axes[2].legend(loc='lower right', bbox_to_anchor=(1, 1.01), ncol=3)
plt.tight_layout()
plt.show()
The lines where you are joining the patterns generates a result, which you don't want.
patterns = [ "\\" , "/" , "+" , "-", ".", "*","x", "o", "O" ]
hatches = ''.join(h*3 for h in patterns)
>>> '\\\\\\///+++---...***xxxoooOOO'
# if you have the bars, this is the output
for bar, hatch in zip([0,1,3], hatches):
print(2*hatch)
>>>
\\
\\
\\
Try to simplify this section using the patterns in your loop directly:
for bar, hatch in zip([0,1,3], patterns):
print(2*hatch)`
>>>
\\
//
++
Output
I used your given code and data to create this output.
I am attempting to create an animated plot that updates in real time with the data from my serial port. Data is streamed in by an Arduino in an 8x8 array. The data are temperatures from an IR camera. I am able to create an instance of a figure but I cannot get the text to update with the serial stream data.
I tried to set 'plt.show(block=False)' so that the script would continue, but this makes the figure empty completely and scales it into a small window with a loading cursor that just continues to load.
I only want the text to update with the array data, as well as the colors from the new normalized data.
How can I get the text to update with the serial data in matplotlib?
Thanks!
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import serial
import time
tempdata = serial.Serial("COM3",9600)
tempdata.timeout = 100
strn = []
rows = ["A", "B", "C", "D",
"E", "F", "G","H"]
columns = ["1", "2", "3", "4",
"5", "6", "7","8"]
print("AMG8833 8x8 Infrared Camera")
time.sleep(0.75)
print("Connected to: " + tempdata.portstr)
time.sleep(0.75)
print("Initializing Camera...")
tempsArray = np.empty((8,8))
while True: #Makes a continuous loop to read values from Arduino
fig, ax = plt.subplots()
im = ax.imshow(tempsArray,cmap='plasma')
tempdata.flush()
strn = tempdata.read_until(']') #reads the value from the serial port as a string
tempsString = np.asarray(strn)
tempsFloat = np.fromstring(tempsString, dtype=float, sep= ', ')
# Axes ticks
ax.set_xticks(np.arange(len(columns)))
ax.set_yticks(np.arange(len(rows)))
# Axes labels
ax.set_xticklabels(columns)
ax.set_yticklabels(rows)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
tempsArray.flat=tempsFloat
im.set_array(tempsArray)
ax.set_title("")
fig.tight_layout()
#Loop over data dimensions and create text annotations.
for i in range(len(rows)):
for j in range(len(columns)):
text = ax.text(j, i, tempsArray[i, j],
ha="center", va="center", color="w")
plt.show()
Heat Map
This dynamic updating can be achieved with matplotlib's interactive mode. The answer to your question is very similar to this one: basically you need to enable interactive mode with ion() and then update the plot without calling the show() (or correlated) function.
Also, the plot and subplots are to be created only once, before the input loop.
This is the modified example:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import serial
import time
tempdata = serial.Serial("COM3",9600)
tempdata.timeout = 100
strn = []
rows = ["A", "B", "C", "D",
"E", "F", "G","H"]
columns = ["1", "2", "3", "4",
"5", "6", "7","8"]
print("AMG8833 8x8 Infrared Camera")
time.sleep(0.75)
print("Connected to: " + tempdata.portstr)
time.sleep(0.75)
print("Initializing Camera...")
tempsArray = np.empty((8,8))
plt.ion()
fig, ax = plt.subplots()
# The subplot colors do not change after the first time
# if initialized with an empty matrix
im = ax.imshow(np.random.rand(8,8),cmap='plasma')
# Axes ticks
ax.set_xticks(np.arange(len(columns)))
ax.set_yticks(np.arange(len(rows)))
# Axes labels
ax.set_xticklabels(columns)
ax.set_yticklabels(rows)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
ax.set_title("")
fig.tight_layout()
text = []
while True: #Makes a continuous loop to read values from Arduino
tempdata.flush()
strn = tempdata.read_until(']') #reads the value from the serial port as a string
tempsString = np.asarray(strn)
tempsFloat = np.fromstring(tempsString, dtype=float, sep= ', ')
tempsArray.flat=tempsFloat
im.set_array(tempsArray)
#Delete previous annotations
for ann in text:
ann.remove()
text = []
#Loop over data dimensions and create text annotations.
for i in range(len(rows)):
for j in range(len(columns)):
text.append(ax.text(j, i, tempsArray[i, j],
ha="center", va="center", color="w"))
# allow some delay to render the image
plt.pause(0.1)
plt.ioff()
Note: this code worked for me, but since I don't have an Arduino right now I tested it with a randomly generated sequence of frames (np.random.rand(8,8,10)), so I might have overlooked some detail. Let me know how it works.
I have two dataframes which I need to get the difference and then plot one of them on top of this difference. Here is a minimal example:
import pandas as pd
import matplotlib.pyplot as plt
df1 = pd.DataFrame([[2,5,7,6,7],[4,4,4,4,3],[8,8,7,3,4],[16,10,12,13,16]], columns=["N", "A", "B", "C", "D"])
df2 = pd.DataFrame([[2,1,3,6,5],[4,1,2,3,2],[8,2,2,3,3],[16,8,10,3,11]], columns=["N", "A", "B", "C", "D"])
dfDiff = df1 - df2
dfDiff['N'] = df1['N']
# Individual barchart
colors = ['#6c8ebf', '#82b366', '#F7A01D', '#9876a7']
df1.set_index('N')[["A", "B", "C", "D"]].plot.bar(color=colors)
df2.set_index('N')[["A", "B", "C", "D"]].plot.bar(color=colors)
dfStacked = pd.DataFrame(columns=["N", "A", "A_diff", "B", "B_diff"])
dfStacked["N"] = df2["N"]
dfStacked["A"] = df2["A"]
dfStacked["B"] = df2["B"]
dfStacked["C"] = df2["C"]
dfStacked["D"] = df2["D"]
dfStacked["A_diff"] = dfDiff["A"]
dfStacked["B_diff"] = dfDiff["B"]
dfStacked["C_diff"] = dfDiff["C"]
dfStacked["D_diff"] = dfDiff["D"]
dfStacked.set_index('N').plot.bar(stacked=True)
plt.show()
The dataframes look like this:
The thing is that now the new stacked one ends up with everything merged. I want to have "A" stacked with "A_diff", "B", stacked with "B_diff", "C" stacked with "C_diff" and "D" stacked with "D_diff".
For example, I changed the code to do it with "A" and "A_diff" as dfStacked.set_index('N')[["A", "A_diff"]].plot.bar(stacked=True) which looks correct, but I want A,B,C and D grouped by N like in the first two figures.
Do I need a new dataframe for this, like dfStacked? If so, in which form should the content be added? And how can I keep the same colors but add hatch="/" only for the "top" stacked bar?
Would it be better to have the dataframe as below?:
df3 = pd.DataFrame(columns=["N", "Algorithm", "df1", "dfDiff"])
df3.loc[len(df3)] = [2, "A", 20, 10]
df3.loc[len(df3)] = [2, "A", 1, 4]
df3.loc[len(df3)] = [4, "A", 2, 3]
df3.loc[len(df3)] = [4, "A", 3, 4]
df3.loc[len(df3)] = [2, "B", 1, 3]
df3.loc[len(df3)] = [2, "B", 2, 4]
df3.loc[len(df3)] = [4, "B", 3, 3]
df3.loc[len(df3)] = [4, "B", 4, 2]
But how to group them by "N" and "Algorithm"? I mean, each row corresponds to one bar, just they should be grouped by "N" with all the "Algorithms" and the two last columns are the two "parts" of each bar. It would be good that the colors match the first two figures (for the "Algorithms") but the top part of the bar has hatch="/" for example.
Thanks for the help
I'll start from df1, df2 and get dfStacked in a slightly different way:
import pandas as pd
df1 = pd.DataFrame(
[
[2,5,7,6,7],
[4,4,4,4,3],
[8,8,7,3,4],
[16,10,12,13,16]
],
columns=["N", "A", "B", "C", "D"]
).set_index('N')
df2 = pd.DataFrame(
[
[2,1,3,6,5],
[4,1,2,3,2],
[8,2,2,3,3],
[16,8,10,3,11]
],
columns=["N", "A", "B", "C", "D"]
).set_index('N')
dfStacked = pd.concat(
[df1, df1-df2],
axis=1,
keys=['raw','diff']
).reorder_levels([1,0], axis=1)
Now we have this DataFrame:
To draw this data in a bar chart stacked by the first level we could make use of two DataFrame.plot's features - ax and bottom. The first one is the location of the axes where the barplot should be drawn, the second one is for the values where the bottom line of the bars should start. For details run help(plt.bar) to read about bottom and help(pd.DataFrame.plot) to read about ax.
import matplotlib.pyplot as plt
from matplotlib.colors import TABLEAU_COLORS
plt.figure(figsize=(10,7))
ax = plt.gca()
names = dfStacked.columns.levels[0]
n = len(names)
color = iter(TABLEAU_COLORS)
w = 1/(n+2) # width
h = '/'*5 # hatch for diff values
for i, name in enumerate(names):
c = next(color) # color
p = n/2 - i # position
dfStacked[name]['raw'].plot.bar(
ax=ax,
position=p,
width=w,
color=c,
label=f'{name} raw'
)
dfStacked[name]['diff'].plot.bar(
ax=ax,
bottom=dfStacked[name]['raw'],
hatch=h,
position=p,
width=w,
color=c,
label=f'{name} diff'
)
ax.set_xlim([-1, n])
ax.tick_params(axis='x', rotation=0)
ax.legend();
And here's the output:
IIUC, try using position parameter in pd.DataFrame.plot.bar:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df1 = pd.DataFrame([[2,5,7,6,7],[4,4,4,4,3],[8,8,7,3,4],[16,10,12,13,16]], columns=["N", "A", "B", "C", "D"])
df2 = pd.DataFrame([[2,1,3,6,5],[4,1,2,3,2],[8,2,2,3,3],[16,8,10,3,11]], columns=["N", "A", "B", "C", "D"])
dfDiff = df1 - df2
dfDiff['N'] = df1['N']
dfStacked = pd.DataFrame(columns=["N", "A", "A_diff", "B", "B_diff"])
dfStacked["N"] = df2["N"]
dfStacked["A"] = df2["A"]
dfStacked["B"] = df2["B"]
dfStacked["C"] = df2["C"]
dfStacked["D"] = df2["D"]
dfStacked["A_diff"] = dfDiff["A"]
dfStacked["B_diff"] = dfDiff["B"]
dfStacked["C_diff"] = dfDiff["C"]
dfStacked["D_diff"] = dfDiff["D"]
dfStacked = dfStacked.set_index('N')
colors = ['red', 'slateblue', 'lightseagreen', 'orange']
colors_c = ['darkred', 'blue', 'darkgreen', 'darkorange']
ax = dfStacked.filter(like='A').plot.bar(stacked=True, position=2, width=.1, color=[colors[0], colors_c[0]], edgecolor='w', alpha=.8)
dfStacked.filter(like='B').plot.bar(stacked=True, ax=ax, position=1, width=.1, color=[colors[1], colors_c[1]], edgecolor='w', alpha=.8)
dfStacked.filter(like='C').plot.bar(stacked=True, ax=ax, position=0, width=.1, color=[colors[2], colors_c[2]], edgecolor='w', alpha=.8)
dfStacked.filter(like='D').plot.bar(stacked=True, ax=ax, position=-1, width=.1, color=[colors[3], colors_c[3]], edgecolor='w', alpha=.8)
ax.set_xlim(-.5,3.5)
plt.legend(loc='upper center', ncol=4, bbox_to_anchor=(.5, 1.2))
plt.show()
Output:
I have a list of numpy arrays, each potentially having a different number of elements, such as:
[array([55]),
array([54]),
array([], dtype=float64),
array([48, 55]),]
I would like to plot this, where each array has an abscissa (x value) assigned, such as [1,2,3,4] so that the plot should show the following points: [[1,55], [2, 54], [4, 48], [4, 55]].
Is there a way I can do that with matplotlib? or how can I transform the data with numpy or pandas first so that it is can be plotted?
What you want to do is chain the original array and generate a new array with "abscissas". There are many way to concatenated, one of the most efficient is using itertools.chain.
import itertools
from numpy import array
x = [array([55]), array([54]), array([]), array([48, 55])]
ys = list(itertools.chain(*x))
# this will be [55, 54, 48, 55]
# generate abscissas
xs = list(itertools.chain(*[[i+1]*len(x1) for i, x1 in enumerate(x)]))
Now you can just plot easily with matplotlib as below
import matplotlib.pyplot as plt
plt.plot(xs, ys)
If you want to have different markers for different groups of data (the colours are automatically cycled by matplotlib):
import numpy as np
import matplotlib.pyplot as plt
markers = ['o', #'circle',
'v', #'triangle_down',
'^', #'triangle_up',
'<', #'triangle_left',
'>', #'triangle_right',
'1', #'tri_down',
'2', #'tri_up',
'3', #'tri_left',
'4', #'tri_right',
'8', #'octagon',
's', #'square',
'p', #'pentagon',
'h', #'hexagon1',
'H', #'hexagon2',
'D', #'diamond',
'd', #'thin_diamond'
]
n_markers = len(markers)
a = [10.*np.random.random(int(np.random.random()*10)) for i in xrange(n_markers)]
fig = plt.figure()
ax = fig.add_subplot(111)
for i, data in enumerate(a):
xs = data.shape[0]*[i,] # makes the abscissas list
marker = markers[i % n_markers] # picks a valid marker
ax.plot(xs, data, marker, label='data %d, %s'%(i, marker))
ax.set_xlim(-1, 1.4*len(a))
ax.set_ylim(0, 10)
ax.legend(loc=None)
fig.tight_layout()
Notice the limits to y scale are hard coded, change accordingly. The 1.4*len(a) is meant to leave room on the right side of the graph for the legend.
The example above has no point in the x=0 (would be dark blue circles) as the randomly assigned size for its data set was zero, but you can easily place a +1 if you don't want to use x=0.
Using pandas to create a numpy array with nans inserted when an array is empty or shorter than the longest array in the list...
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
arr_list = [np.array([55]),
np.array([54]),
np.array([], dtype='float64'),
np.array([48, 55]),]
df = pd.DataFrame(arr_list)
list_len = len(df)
repeats = len(list(df))
vals = df.values.flatten()
xax = np.repeat(np.arange(list_len) + 1, repeats)
df_plot = pd.DataFrame({'xax': xax, 'vals': vals})
plt.scatter(df_plot.xax, df_plot.vals);
with x your list :
[plt.plot(np.repeat(i,len(x[i])), x[i],'.') for i in range(len(x))]
plt.show()
#Alessandro Mariani's answer based on itertools made me think of another way to generate an array containg the data I needed. In some cases it may be more compact. It is also based on itertools.chain:
import itertools
from numpy import array
y = [array([55]), array([54]), array([]), array([48, 55])]
x = array([1,2,3,4])
d = array(list(itertools.chain(*[itertools.product([t], n) for t, n in zip(x,y)])))
d is now the following array:
array([[ 1, 55],
[ 2, 54],
[ 4, 48],
[ 4, 55]])