I really like the simplicity with how ipywidgets.interactive works with pandas dataframe but I am having trouble getting data when a point in a scatter plot is selected.
I have looked at some examples that use matplotlib.widgets etc. but none that use it with interactive in Jupyter. It looks like this technique would be described here but it comes up just short:
http://minrk-ipywidgets.readthedocs.io/en/latest/examples/Using%20Interact.html
Here is an ipynb of what I am trying to accomplish:
from ipywidgets import interactive
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.widgets import Button
from matplotlib.text import Annotation
from io import StringIO
data_ssv = """tone_amp_0 tone_freq_0 SNR
75.303 628.0 68.374
84.902 8000.0 61.292
92.856 288.0 70.545
70.000 2093.0 35.036
76.511 6834.0 66.952 """
data = pd.read_table(StringIO(data_ssv), sep="\s+", header=0)
col_names=list(data.columns.values)
plottable_col=( ['tone_amp_0', 'tone_freq_0', 'SNR'] )
def annotate(axis, text, x, y):
text_annotation = Annotation(text, xy=(x, y), xycoords='data')
axis.add_artist(text_annotation)
def onpick(event):
ind = event.ind
label_pos_x = event.mouseevent.xdata
label_pos_y = event.mouseevent.ydata
offset = 0 # just in case two dots are very close, this offset will help the labels not appear one on top of each other
for i in ind: # if the dots are to close one to another, a list of dots clicked is returned by the matplotlib library
label = "gen_labels" # generated_labels[i]
print( "index", i, label ) # step 4: log it for debugging purposes
ax=plt.gca()
annotate(ax,label,label_pos_x + offset,label_pos_y + offset)
ax.figure.canvas.draw_idle()
offset += 0.01 # alter the offset just in case there are more than one dots affected by the click
def update_plot(X='tone_amp_0', Y='tone_frq_0', Z='SNR'):
plt.scatter( data.loc[:, [X]],data.loc[:, [Y]], marker='.', edgecolors='none', c=data.loc[:,[Z]], picker=True, cmap='RdYlGn' )
plt.title(X+' vs '+Y); plt.xlabel(X); plt.ylabel(Y); plt.colorbar().set_label(Z, labelpad=+1)
plt.grid(); plt.show()
plt.gcf().canvas.mpl_connect('pick_event', onpick)
interactive(update_plot, X=plottable_col, Y=plottable_col, Z=plottable_col)
When I select a data point nothing is happening. Not sure how to debug this or understand what I am doing wrong. Can someone point out what I am doing wrong here?
Try put a semicolon at the end of plt.gcf().canvas.mpl_connect('pick_event', onpick).
Related
Am trying to find hist()'s figsize and layout parameter for sns.pairplot().
I have a pairplot that gives me nice scatterplots between the X's and y. However, it is oriented horizontally and there is no equivalent layout parameter to make them vertical to my knowledge. 4 plots per row would be great.
This is my current sns.pairplot():
sns.pairplot(X_train,
x_vars = X_train.select_dtypes(exclude=['object']).columns,
y_vars = ["SalePrice"])
This is what I would like it to look like: Source
num_mask = train_df.dtypes != object
num_cols = train_df.loc[:, num_mask[num_mask == True].keys()]
num_cols.hist(figsize = (30,15), layout = (4,10))
plt.show()
What you want to achieve isn't currently supported by sns.pairplot, but you can use one of the other figure-level functions (sns.displot, sns.catplot, ...). sns.lmplot creates a grid of scatter plots. For this to work, the dataframe needs to be in "long form".
Here is a simple example. sns.lmplot has parameters to leave out the regression line (fit_reg=False), to set the height of the individual subplots (height=...), to set its aspect ratio (aspect=..., where the subplot width will be height times aspect ratio), and many more. If all y ranges are similar, you can use the default sharey=True.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# create some test data with different y-ranges
np.random.seed(20230209)
X_train = pd.DataFrame({"".join(np.random.choice([*'uvwxyz'], np.random.randint(3, 8))):
np.random.randn(100).cumsum() + np.random.randint(100, 1000) for _ in range(10)})
X_train['SalePrice'] = np.random.randint(10000, 100000, 100)
# convert the dataframe to long form
# 'SalePrice' will get excluded automatically via `melt`
compare_columns = X_train.select_dtypes(exclude=['object']).columns
long_df = X_train.melt(id_vars='SalePrice', value_vars=compare_columns)
# create a grid of scatter plots
g = sns.lmplot(data=long_df, x='SalePrice', y='value', col='variable', col_wrap=4, sharey=False)
g.set(ylabel='')
plt.show()
Here is another example, with histograms of the mpg dataset:
import matplotlib.pyplot as plt
import seaborn as sns
mpg = sns.load_dataset('mpg')
compare_columns = mpg.select_dtypes(exclude=['object']).columns
mpg_long = mpg.melt(value_vars=compare_columns)
g = sns.displot(data=mpg_long, kde=True, x='value', common_bins=False, col='variable', col_wrap=4, color='crimson',
facet_kws={'sharex': False, 'sharey': False})
g.set(xlabel='')
plt.show()
When I run this code
import Scientific.IO.NetCDF as S
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import xarray as xr
import metpy
import numpy as N
from metpy.plots import ContourPlot, ImagePlot, MapPanel, PanelContainer
# Any import of metpy will activate the accessors
import metpy.calc as mpcalc
#from metpy.testing import get_test_data
from metpy.units import units
# Open the netCDF file as a xarray Datase
#
datadir='C:/Users/stratus/AppData/Local/lxss/home/stratus/PROJECT/NEWPROJECT/FEB012017/nam_218_20170131_1200_000.nc'
data = xr.open_dataset(datadir,decode_cf=True)
# To parse the full dataset, we can call parse_cf without an argument, and assign the returned
# Dataset.
data = data.metpy.parse_cf()
tempatt=data['TMP_P0_L100_GLC0'].attrs
# If we instead want just a single variable, we can pass that variable name to parse_cf and
# it will return just that data variable as a DataArray.
data_var = data.metpy.parse_cf('TMP_P0_L100_GLC0')
# To rename variables, supply a dictionary between old and new names to the rename method
data.rename({
'TMP_P0_L100_GLC0': 'temperature',
}, inplace=True)
data['temperature'].metpy.convert_units('degC')
# Get multiple coordinates (for example, in just the x and y direction)
x, y = data['temperature'].metpy.coordinates('x', 'y')
# If we want to get just a single coordinate from the coordinates method, we have to use
# tuple unpacking because the coordinates method returns a generator
vertical, = data['temperature'].metpy.coordinates('vertical')
data_crs = data['temperature'].metpy.cartopy_crs
# Or, we can just get a coordinate from the property
#time = data['temperature'].metpy.time
# To verify, we can inspect all their names
#print([coord.name for coord in (x, y, vertical, time)])
#
#heights = data['height'].metpy.loc[{'time': time[0], 'vertical': 850. * units.hPa}]
#lat, lon = xr.broadcast(y, x)
#f = mpcalc.coriolis_parameter(lat)
#dx, dy = mpcalc.grid_deltas_from_dataarray(heights)
#u_geo, v_geo = mpcalc.geostrophic_wind(heights, f, dx, dy)
#print(u_geo)
#print(v_geo)
fig=plt.figure(1)
# A very simple example example of a plot of 500 hPa heights
data_crs = data['temperature'].metpy.cartopy_crs
ax = plt.axes(projection=ccrs.LambertConformal())
data['temperature'].metpy.loc[{'vertical': 850. * units.hPa}].plot(ax=ax, transform=data_crs)
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.COASTLINE)
plt.show()
#ax.set_extent([-120,-80,20,50])
plt.title("850 mb Temperature")
#plt.suptitle("Metpy Test")
plt.show()
I had to edit the code as per some of the answers but I am getting a mostly blank map now. 850 T Map fail I am mainly trying to have the temperatures at 850 mb overlap the US so I could show it to a friend to practice for a project I am helping him with. The filling of the parentheses for the data helped a bit which is why I edited it.
As pointed out in the comments it is difficult to answer without a reproducible example. However, the following may solve your issue:
data_crs = data['temperature'].metpy.cartopy_crs
ax = plt.axes(projection=ccrs.LambertConformal())
data['temperature'].metpy.loc[{'vertical': 1000. * units.hPa}].plot(ax=ax, transform=data_crs)
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.COASTLINE)
plt.show()
I use bokeh in an ipython notebook and would like to have a button next to a plot to switch on or off labels of the data points. I found a solution using IPython.html.widgets.interact, but this solution resets the plot for each update including zooming and padding
This is the minimal working code example:
from numpy.random import random
from bokeh.plotting import figure, show, output_notebook
from IPython.html.widgets import interact
def plot(label_flag):
p = figure()
N = 10
x = random(N)+2
y = random(N)+2
labels = range(N)
p.scatter(x, y)
if label_flag:
pass
p.text(x, y, labels)
output_notebook()
show(p)
interact(plot, label_flag=True)
p.s. If there is an easy way to do this in matplotlib I would also switch back again.
By using bokeh.models.ColumnDataSource to store and change the plot's data I was able to achieve what I wanted.
One caveat is, that I found no way to make it work w/o refresh w/o calling output_notebook twice in two different cells. If I remove one of the two output_notebook calls the gui of the tools-button looks breaks or changing a setting also results in a reset of the plot.
from numpy.random import random
from bokeh.plotting import figure, show, output_notebook
from IPython.html.widgets import interact
from bokeh.models import ColumnDataSource
output_notebook()
## <-- new cell -->
p = figure()
N = 10
x_data = random(N)+2
y_data = random(N)+2
labels = range(N)
source = ColumnDataSource(
data={
'x':x_data,
'y':y_data,
'desc':labels
}
)
p.scatter('x', 'y', source=source)
p.text('x', 'y', 'desc', source=source)
output_notebook()
def update_plot(label_flag=True):
if label_flag:
source.data['desc'] = range(N)
else:
source.data['desc'] = ['']*N
show(p)
interact(update_plot, label_flag=True)
My users sometimes wish to see log scaling of the values of a 2-d plot, even though the data spans less than one decade. I'm able to make plots using 'pcolormesh' or 'imshow' using the
norm=LogNorm(vmin=minimum,vmax=maximum)
parameter and accurately show log scaled 'intensity' values. I would like the 'colorbar' to show some minor ticks and tick labels, but when minimum and maximum span less than a decade, no matter what I do there is only one tick value displayed. I tried the suggestion in this SO posting:
Minor ticks in matplotlib's colorbar
As adapted in the following snippet:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
# fill grid
x = np.linspace(1,10,10)
y = np.linspace(1,10,10)
X, Y = np.meshgrid(x,y)
Z = np.abs(X/10 + Y/10)
# plot
f, ax = plt.subplots()
p = plt.pcolormesh(X, Y, Z, norm=LogNorm(), vmin=2e-1, vmax=1)
cb = plt.colorbar(p, ax=ax)
cb.ax.minorticks_on()
plt.show()
But there are no minor ticks, labeled or otherwise:
I have also tried the following:
from matplotlib.ticker import LogFormatterMathtext
from matplotlib.ticker import LogLocator
from matplotlib.ticker import LogFormatter
import numpy as nmp
import matplotlib.pyplot as pyp
'''
<snip>
'''
ccbb=pyp.colorbar(label='ohms')
ccbb.ax.yaxis.set_minor_locator(LogLocator(subs=nmp.arange(2,10)))
# AND/OR
# ccbb.ax.yaxis.set_minor_locator(LogLocator(subs=[0.2,0.5,1.0]))
ccbb.ax.yaxis.set_minor_formatter(LogFormatterMathtext())
ccbb.update_ticks()
'''
<snip>
'''
And several other things, which I haven't saved. All of which yield the same result with the colorbar missing any but the single decade tick / label. The documentation for the ticker class is pretty impenetrable:
http://matplotlib.org/api/ticker_api.html
Especially the following statement about LogFormatter parameter labelOnlyBase:
"base is used to locate the decade tick, which will be the only one to be labeled if labelOnlyBase is False" Neither False nor True cause more than the base to be ticked, I suppose that's because this refers to the Major ticks, But why in the world can't I get the minor ticks or labels??
Any advice would be appreciated.
Matplotlib colorbars don't seem to do minor ticks in log scale. Using the method in this answer works, though it's a bit inconvenient - one day this will be automatic, but for now you have to organise the minor tick values by hand (np.arange(2, 10)/10. in this case, but you'd have to append np.arange(2, 10) if your values went up to 10)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
# fill grid
x = np.linspace(1,10,10)
y = np.linspace(1,10,10)
X, Y = np.meshgrid(x,y)
# Z = np.abs(X + Y)
Z = np.abs(X/10 + Y/10)
# plot
f, ax = plt.subplots()
p = plt.pcolormesh(X, Y, Z, norm=LogNorm(), vmin=2e-1, vmax=1)
cb = plt.colorbar(p, ax=ax)
# cb.ax.minorticks_on()
# We need to nomalize the tick locations so that they're in the range from 0-1...
minorticks = p.norm(np.arange(2, 10)/10.)
cb.ax.yaxis.set_ticks(minorticks, minor=True)
plt.show()
The minorticks_on() method wasn't doing anything, so I commented it out.
In R, there is a function locator which is like Matlab's ginput where you can click on the figure with a mouse and select any x,y coordinate. In addition, there is a function called identify(x,y) where if you give it a set of points x,y that you have plotted and then click on the figure, it will return the index of the x,y point which lies nearest (within an adjustable tolerance) to the location you have selected (or multiple indices, if multiple points are selected). Is there such a functionality in Matplotlib?
You may want to use a pick event :
fig = figure()
ax1 = fig.add_subplot(111)
ax1.set_title('custom picker for line data')
line, = ax1.plot(rand(100), rand(100), 'o', picker=line_picker)
fig.canvas.mpl_connect('pick_event', onpick2)
Tolerance set by picker parameter there:
line, = ax1.plot(rand(100), 'o', picker=5) # 5 points tolerance
from __future__ import print_function
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib.patches import Rectangle
from matplotlib.text import Text
from matplotlib.image import AxesImage
import numpy as np
from numpy.random import rand
if 1:
fig, ax = plt.subplots()
ax.set_title('click on points', picker=True)
ax.set_ylabel('ylabel', picker=True, bbox=dict(facecolor='red'))
line, = ax.plot(rand(100), 'o', picker=5)
def onpick1(event):
if isinstance(event.artist, Line2D):
thisline = event.artist
xdata = thisline.get_xdata()
ydata = thisline.get_ydata()
ind = event.ind
print 'X='+str(np.take(xdata, ind)[0]) # Print X point
print 'Y='+str(np.take(ydata, ind)[0]) # Print Y point
fig.canvas.mpl_connect('pick_event', onpick1)
Wow many years have passed! Now matplotlib also support the ginput function which has almost the same API as Matlab. So there is no need to hack by the mpl-connect and so on any more! (https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.ginput.html) For instance,
plt.ginput(4)
will let the user to select 4 points.
The ginput() is a handy tool to select x, y coordinates of any random point from a plotted window, however that point may not belong to the plotted data. To select x, y coordinates of a point from the plotted data, an efficient tool still is to use 'pick_event' property with mpl_connect as the example given in the documentation. For example:
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import rand
fig, ax = plt.subplots()
ax.plot(rand(100), rand(100), picker=3)
# 3, for example, is tolerance for picker i.e, how far a mouse click from
# the plotted point can be registered to select nearby data point/points.
def on_pick(event):
global points
line = event.artist
xdata, ydata = line.get_data()
print('selected point is:',np.array([xdata[ind], ydata[ind]]).T)
cid = fig.canvas.mpl_connect('pick_event', on_pick)
The last line above will connect the plot with the 'pick_event' and the corrdinates of the nearest plot points will keep printing after each mouse click on plot, to end this process, we need to use mpl_disconnect as:
fig.canvas.mpl_disconnect(cid)