How to tell `photutils` to plot only apertures which satisfy a condition? - astropy

I'm following an example in the photutils documentation to detect sources in an image:
from astropy.stats import sigma_clipped_stats
from photutils.datasets import load_star_image
import numpy as np
import matplotlib.pyplot as plt
from astropy.visualization import SqrtStretch
from astropy.visualization.mpl_normalize import ImageNormalize
from photutils.detection import DAOStarFinder
from photutils.aperture import CircularAperture
# Load image
hdu = load_star_image() # load a star image from the dataset
data = hdu.data[0:101, 0:101]
mean, median, std = sigma_clipped_stats(data, sigma = 3.0) # estimate noise
# Find stars in the image that have FWHMs of 3 pixels and peaks ~ 5 sigma > bg
daofind = DAOStarFinder(fwhm = 3.0, threshold = 5.*std)
sources = daofind(data - median)
# Print position and photometric data for each star in the image
for col in sources.colnames:
sources[col].info.format = '%.8g' # for consistent table output
positions = np.transpose((sources['xcentroid'], sources['ycentroid']))
apertures = CircularAperture(positions, r = 4.)
norm = ImageNormalize(stretch = SqrtStretch())
plt.imshow(data, cmap = 'Greys', origin = 'lower', norm = norm,
interpolation = 'nearest')
for i in range(len(sources)):
if sources[i][-1] < -2:
print(sources[i][-1])
apertures.plot(color = 'r', lw = 1.5, alpha = 0.5
Which produces
I've added the last four lines, with the intention to plot apertures around only the brightest stars. However, the for loop doesn't change the image. I understand why (it's plotting all apertures multiple times, once for each of the 4 stars with mag < -2), but how do I change it to plot them for only those stars?

Related

xarray : how to stack several pcolormesh figures above a map?

For a ML project I'm currently on, I need to verify if the trained data are good or not.
Let's say that I'm "splitting" the sky into several altitude grids (let's take 3 values for the moment) and for a given region (let's say, Europe).
One grid could be a signal reception strength (RSSI), another one the signal quality (RSRQ)
Each cell of the grid is therefor a rectangle and it has a mean value of each measurement (i.e. RSSI or RSRQ) performed in that area.
I have hundreds of millions of data
In the code below, I know how to draw a coloured mesh with xarray for each altitude: I just use xr.plot.pcolormesh(lat,lon, the_data_set); that's fine
But this will only give me a "flat" figure like this:
RSSI value at 3 different altitudes
I need to draw all the pcolormesh() of a dataset for each altitude in such way that:
1: I can have the map at the bottom
2: Each pcolormesh() is stacked and "displayed" at its altitude
3: I need to add a 3d scatter plot for testing my trained data
4: Need to be interactive as I have to zoom in areas
For 2 and 3 above, I managed to do something using plt and cartopy :
enter image description here
But plt/cartopy combination is not as interactive as plotly.
But plotly doesn't have the pcolormesh functionality
And still ... I don't know in anycase, how to "stack" the pcolormesh results that I did get above.
I've been digging Internet for few days but I didn't find something that could satisfy all my criteria.
What I did to get my pcolormesh:
import numpy as np
import xarray as xr
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
class super_data():
def __init__(self, lon_bound,lat_bound,alt_bound,x_points,y_points,z_points):
self.lon_bound = lon_bound
self.lat_bound = lat_bound
self.alt_bound = alt_bound
self.x_points = x_points
self.y_points = y_points
self.z_points = z_points
self.lon, self.lat, self.alt = np.meshgrid(np.linspace(self.lon_bound[0], self.lon_bound[1], self.x_points),
np.linspace(self.lat_bound[0], self.lat_bound[1], self.y_points),
np.linspace(self.alt_bound[0], self.alt_bound[1], self.z_points))
self.this_xr = xr.Dataset(
coords={'lat': (('latitude', 'longitude','altitude'), self.lat),
'lon': (('latitude', 'longitude','altitude'), self.lon),
'alt': (('latitude', 'longitude','altitude'), self.alt)})
def add_data_array(self,ds_name,ds_min,ds_max):
def create_temp_data(ds_min,ds_max):
data = np.random.randint(ds_min,ds_max,size=self.y_points * self.x_points)
return data
temp_data = []
# Create "z_points" number of layers in the z axis
for i in range(self.z_points):
temp_data.append(create_temp_data(ds_min,ds_max))
data = np.concatenate(temp_data)
data = data.reshape(self.z_points,self.x_points, self.y_points)
self.this_xr[ds_name] = (("altitude","longitude","latitude"),data)
def plot(self,dataset, extent=None, plot_center=False):
# I want t
if np.sqrt(self.z_points) == np.floor(np.sqrt(self.z_points)):
side_size = int(np.sqrt(self.z_points))
else:
side_size = int(np.floor(np.sqrt(self.z_points) + 1))
fig = plt.figure()
i_ax=1
for i in range(side_size):
for j in range(side_size):
if i_ax < self.z_points+1:
this_dataset = self.this_xr[dataset].sel(altitude=i_ax-1)
# Initialize figure with subplots
ax = fig.add_subplot(side_size, side_size, i_ax, projection=ccrs.PlateCarree())
i_ax += 1
ax.coastlines()
this_dataset.plot.pcolormesh('lon', 'lat', ax=ax, infer_intervals=True, alpha=0.5)
else:
break
plt.tight_layout()
plt.show()
if __name__ == "__main__":
# Wanted coverage :
lons = [-15, 30]
lats = [35, 65]
alts = [1000, 5000]
xarr = super_data(lons,lats,alts,10,8,3)
# Add some fake data
xarr.add_data_array("RSSI",-120,-60)
xarr.add_data_array("pressure",700,1013)
xarr.plot("RSSI",0)
Thanks for you help

Adding Labels to a Shapefile Map

I have a shapefile that maps the world to sales territories. The shapefile records lists the sales territory code and name. I would like to be able to add the territory code in the center of the region, but to do using ax.text, I need the center point of the region. Any ideas how to do this?
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import shapefile
from mpl_toolkits.basemap import Basemap as Basemap
from matplotlib.colors import rgb2hex, Normalize
from matplotlib.patches import Polygon
from matplotlib.colorbar import ColorbarBase
from matplotlib.collections import PatchCollection
plt.rcParams['figure.figsize'] = [16,12]
fig = plt.figure()
m = Basemap(llcrnrlon=-121,llcrnrlat=20,urcrnrlon=-62,urcrnrlat=51,
projection='lcc',lat_1=32,lat_2=45,lon_0=-95)
shp_info = m.readshapefile('world_countries_boundary_file_world_2002','countries',drawbounds=True)
sf = shapefile.Reader('territory_map') # Custom file mapping out territories
recs = sf.records()
shapes = sf.shapes()
Nshp = len(shapes)
colors={}
territory_codes=[]
cmap = plt.cm.RdYlGn
# details is a pandas datafile with column "DELTA" that has data to plot
vmin = details.DELTA.min()
vmax = details.DELTA.max()
norm = Normalize(vmin=vmin, vmax=vmax)
for index,row in details.iterrows():
colors[row['TERRITORY_CODE']] = cmap((row['DELTA']-vmin)/(vmax-vmin))[:3]
territory_codes.append(row['TERRITORY_CODE'])
ax = fig.add_subplot(111)
for nshp in range(Nshp):
ptchs = []
pts = np.array((shapes[nshp].points))
prt = shapes[nshp].parts
par = list(prt) + [pts.shape[0]]
for pij in range(len(prt)):
ptchs.append(Polygon(pts[par[pij]:par[pij+1]]))
try:
color = rgb2hex(colors[recs[nshp][0]])
except:
color = 'w' # If no data, leave white (blank)
ax.add_collection(PatchCollection(ptchs, facecolor=color, edgecolor='b', linewidths=.7))
x, y = # Coordinates that are center of region
ax.text(x, y, recs[nshp][0]) # <---- this would be the text to add
# Add colorbar
ax_c = fig.add_axes([0.1, 0.1, 0.8, 0.02])
cb = ColorbarBase(ax_c,cmap=cmap,norm=norm,orientation='horizontal')
cb.ax.set_xlabel('Daily Change, USD')
#Set view to United States
ax.set_xlim(-150,-40)
ax.set_ylim(15,70)
plt.show()
Resulting Map of Code without Territory Names
you're probably looking to take the mean of all the x coordinates and the mean of all the y coordinates of your polygon shape.
I can't test this but it could look something like this:
x,y = pts[0].mean(), pts[1].mean()
or this:
x,y = pts[:,0].mean(), pts[:,1].mean()
depending on the dimensions of your numpy array.

Plotting Lat/Long Points Using Basemap

I am trying to plot points on a map using matplotlib and Basemap, where the points represent the lat/long for specific buildings. My map does indeed plot the points, but puts them in the wrong location. When I use the same data and do the same thing using Bokeh, instead of matplotlib and basemap, I get the correct plot.
Here is the CORRECT result in Bokeh:
Bokeh Version
And here is the INCORRECT result in Basemap:
Basemap Version
I have seen discussion elsewhere on StackOverflow that suggested this might be related to the fact that plot() "shifts" the longitude somehow. I've tried the suggestion from there, which was to include the line:
lons, lats = m.shiftdata(long, lat)
and then use the shifted data. That didn't have any visible impact.
My full sample code which generates both of the plots in Basemap and Bokeh is here:
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.sampledata.us_states import data as states
from bokeh.models import ColumnDataSource, Range1d
# read in data to use for plotted points
buildingdf = pd.read_csv('buildingdata.csv')
lat = buildingdf['latitude'].values
long = buildingdf['longitude'].values
# determine range to print based on min, max lat and long of the data
margin = .2 # buffer to add to the range
lat_min = min(lat) - margin
lat_max = max(lat) + margin
long_min = min(long) - margin
long_max = max(long) + margin
# create map using BASEMAP
m = Basemap(llcrnrlon=long_min,
llcrnrlat=lat_min,
urcrnrlon=long_max,
urcrnrlat=lat_max,
lat_0=(lat_max - lat_min)/2,
lon_0=(long_max-long_min)/2,
projection='merc',
resolution = 'h',
area_thresh=10000.,
)
m.drawcoastlines()
m.drawcountries()
m.drawstates()
m.drawmapboundary(fill_color='#46bcec')
m.fillcontinents(color = 'white',lake_color='#46bcec')
# convert lat and long to map projection coordinates
lons, lats = m(long, lat)
# plot points as red dots
m.scatter(lons, lats, marker = 'o', color='r')
plt.show()
# create map using Bokeh
source = ColumnDataSource(data = dict(lat = lat,lon = long))
# get state boundaries
state_lats = [states[code]["lats"] for code in states]
state_longs = [states[code]["lons"] for code in states]
p = figure(
toolbar_location="left",
plot_width=1100,
plot_height=700,
)
# limit the view to the min and max of the building data
p.y_range = Range1d(lat_min, lat_max)
p.x_range = Range1d(long_min, long_max)
p.xaxis.visible = False
p.yaxis.visible = False
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.patches(state_longs, state_lats, fill_alpha=0.0,
line_color="black", line_width=2, line_alpha=0.3)
p.circle(x="lon", y="lat", source = source, size=4.5,
fill_color='red',
line_color='grey',
line_alpha=.25
)
show(p)
I don't have enough reputation points to post a link to the data or to include it here.
In the basemap plot the scatter points are hidden behind the fillcontinents. Removing the two lines
#m.drawmapboundary(fill_color='#46bcec')
#m.fillcontinents(color = 'white',lake_color='#46bcec')
would show you the points. Because this might be undesired, the best solution would be to place the scatter on top of the rest of the map by using the zorder argument.
m.scatter(lons, lats, marker = 'o', color='r', zorder=5)
Here is the complete code (and I would like to ask you to include this kind of runnable minimal example with hardcoded data next time asking a question, as it saves everyone a lot of work inventing the data oneself):
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import pandas as pd
import io
u = u"""latitude,longitude
42.357778,-71.059444
39.952222,-75.163889
25.787778,-80.224167
30.267222, -97.763889"""
# read in data to use for plotted points
buildingdf = pd.read_csv(io.StringIO(u), delimiter=",")
lat = buildingdf['latitude'].values
lon = buildingdf['longitude'].values
# determine range to print based on min, max lat and lon of the data
margin = 2 # buffer to add to the range
lat_min = min(lat) - margin
lat_max = max(lat) + margin
lon_min = min(lon) - margin
lon_max = max(lon) + margin
# create map using BASEMAP
m = Basemap(llcrnrlon=lon_min,
llcrnrlat=lat_min,
urcrnrlon=lon_max,
urcrnrlat=lat_max,
lat_0=(lat_max - lat_min)/2,
lon_0=(lon_max-lon_min)/2,
projection='merc',
resolution = 'h',
area_thresh=10000.,
)
m.drawcoastlines()
m.drawcountries()
m.drawstates()
m.drawmapboundary(fill_color='#46bcec')
m.fillcontinents(color = 'white',lake_color='#46bcec')
# convert lat and lon to map projection coordinates
lons, lats = m(lon, lat)
# plot points as red dots
m.scatter(lons, lats, marker = 'o', color='r', zorder=5)
plt.show()

Percentile Distribution Graph

Does anyone have an idea how to change X axis scale and ticks to display a percentile distribution like the graph below? This image is from MATLAB, but I want to use Python (via Matplotlib or Seaborn) to generate.
From the pointer by #paulh, I'm a lot closer now. This code
import matplotlib
matplotlib.use('Agg')
import numpy as np
import matplotlib.pyplot as plt
import probscale
import seaborn as sns
clear_bkgd = {'axes.facecolor':'none', 'figure.facecolor':'none'}
sns.set(style='ticks', context='notebook', palette="muted", rc=clear_bkgd)
fig, ax = plt.subplots(figsize=(8, 4))
x = [30, 60, 80, 90, 95, 97, 98, 98.5, 98.9, 99.1, 99.2, 99.3, 99.4]
y = np.arange(0, 12.1, 1)
ax.set_xlim(40, 99.5)
ax.set_xscale('prob')
ax.plot(x, y)
sns.despine(fig=fig)
Generates the following plot (notice the re-distributed X-Axis)
Which I find much more useful than a the standard scale:
I contacted the author of the original graph and they gave me some pointers. It is actually a log scale graph, with x axis reversed and values of [100-val], with manual labeling of the x axis ticks. The code below recreates the original image with the same sample data as the other graphs here.
import matplotlib
matplotlib.use('Agg')
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
clear_bkgd = {'axes.facecolor':'none', 'figure.facecolor':'none'}
sns.set(style='ticks', context='notebook', palette="muted", rc=clear_bkgd)
x = [30, 60, 80, 90, 95, 97, 98, 98.5, 98.9, 99.1, 99.2, 99.3, 99.4]
y = np.arange(0, 12.1, 1)
# Number of intervals to display.
# Later calculations add 2 to this number to pad it to align with the reversed axis
num_intervals = 3
x_values = 1.0 - 1.0/10**np.arange(0,num_intervals+2)
# Start with hard-coded lengths for 0,90,99
# Rest of array generated to display correct number of decimal places as precision increases
lengths = [1,2,2] + [int(v)+1 for v in list(np.arange(3,num_intervals+2))]
# Build the label string by trimming on the calculated lengths and appending %
labels = [str(100*v)[0:l] + "%" for v,l in zip(x_values, lengths)]
fig, ax = plt.subplots(figsize=(8, 4))
ax.set_xscale('log')
plt.gca().invert_xaxis()
# Labels have to be reversed because axis is reversed
ax.xaxis.set_ticklabels( labels[::-1] )
ax.plot([100.0 - v for v in x], y)
ax.grid(True, linewidth=0.5, zorder=5)
ax.grid(True, which='minor', linewidth=0.5, linestyle=':')
sns.despine(fig=fig)
plt.savefig("test.png", dpi=300, format='png')
This is the resulting graph:
These type of graphs are popular in the low-latency community for plotting latency distributions. When dealing with latencies most of the interesting information tends to be in the higher percentiles, so a logarithmic view tends to work better. I've first seen these graphs used in https://github.com/giltene/jHiccup and https://github.com/HdrHistogram/.
The cited graph was generated by the following code
n = ceil(log10(length(values)));
p = 1 - 1./10.^(0:0.01:n);
percentiles = prctile(values, p * 100);
semilogx(1./(1-p), percentiles);
The x-axis was labelled with the code below
labels = cell(n+1, 1);
for i = 1:n+1
labels{i} = getPercentileLabel(i-1);
end
set(gca, 'XTick', 10.^(0:n));
set(gca, 'XTickLabel', labels);
% {'0%' '90%' '99%' '99.9%' '99.99%' '99.999%' '99.999%' '99.9999%'}
function label = getPercentileLabel(i)
switch(i)
case 0
label = '0%';
case 1
label = '90%';
case 2
label = '99%';
otherwise
label = '99.';
for k = 1:i-2
label = [label '9'];
end
label = [label '%'];
end
end
The following Python code uses Pandas to read a csv file that contains a list of recorded latency values (in milliseconds), then it records those latency values (as microseconds) in an HdrHistogram, and saves the HdrHistogram to an hgrm file, that will then be used by Seaborn to plot the latency distribution graph.
import pandas as pd
from hdrh.histogram import HdrHistogram
from hdrh.dump import dump
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import sys
import argparse
# Parse the command line arguments.
parser = argparse.ArgumentParser()
parser.add_argument('csv_file')
parser.add_argument('hgrm_file')
parser.add_argument('png_file')
args = parser.parse_args()
csv_file = args.csv_file
hgrm_file = args.hgrm_file
png_file = args.png_file
# Read the csv file into a Pandas data frame and generate an hgrm file.
csv_df = pd.read_csv(csv_file, index_col=False)
USECS_PER_SEC=1000000
MIN_LATENCY_USECS = 1
MAX_LATENCY_USECS = 24 * 60 * 60 * USECS_PER_SEC # 24 hours
# MAX_LATENCY_USECS = int(csv_df['response-time'].max()) * USECS_PER_SEC # 1 hour
LATENCY_SIGNIFICANT_DIGITS = 5
histogram = HdrHistogram(MIN_LATENCY_USECS, MAX_LATENCY_USECS, LATENCY_SIGNIFICANT_DIGITS)
for latency_sec in csv_df['response-time'].tolist():
histogram.record_value(latency_sec*USECS_PER_SEC)
# histogram.record_corrected_value(latency_sec*USECS_PER_SEC, 10)
TICKS_PER_HALF_DISTANCE=5
histogram.output_percentile_distribution(open(hgrm_file, 'wb'), USECS_PER_SEC, TICKS_PER_HALF_DISTANCE)
# Read the generated hgrm file into a Pandas data frame.
hgrm_df = pd.read_csv(hgrm_file, comment='#', skip_blank_lines=True, sep=r"\s+", engine='python', header=0, names=['Latency', 'Percentile'], usecols=[0, 3])
# Plot the latency distribution using Seaborn and save it as a png file.
sns.set_theme()
sns.set_style("dark")
sns.set_context("paper")
sns.set_color_codes("pastel")
fig, ax = plt.subplots(1,1,figsize=(20,15))
fig.suptitle('Latency Results')
sns.lineplot(x='Percentile', y='Latency', data=hgrm_df, ax=ax)
ax.set_title('Latency Distribution')
ax.set_xlabel('Percentile (%)')
ax.set_ylabel('Latency (seconds)')
ax.set_xscale('log')
ax.set_xticks([1, 10, 100, 1000, 10000, 100000, 1000000, 10000000])
ax.set_xticklabels(['0', '90', '99', '99.9', '99.99', '99.999', '99.9999', '99.99999'])
fig.tight_layout()
fig.savefig(png_file)

Image Boundary Finding and Filling

I have a single-band binary image (consisting of only 0 and 1 pixel values) as shown in the figure below.
I have to convert all the black pixels inside the outer white boundaries into whites.
The black pixels outside the outer white boundaries should remain black.
How would you do it?
The code below yields the following result:
I've commented the code inline to explain what I've done.
from skimage import io, img_as_bool, measure, morphology
from scipy import ndimage
import numpy as np
import matplotlib.pyplot as plt
# Read the image, convert the values to True or False;
# discard all but the red channel (since it's a black and
# white image, they're all the same)
image = img_as_bool(io.imread('borders.png'))[..., 0]
# Compute connected regions in the image; we're going to use this
# to find centroids for our watershed segmentation
labels = measure.label(image)
regions = measure.regionprops(labels)
# Marker locations for the watershed segmentation; we choose these to
# be the centroids of the different connected regions in the image
markers = np.array([r.centroid for r in regions]).astype(np.uint16)
marker_image = np.zeros_like(image, dtype=np.int64)
marker_image[markers[:, 0], markers[:, 1]] = np.arange(len(markers)) + 1
# Compute the distance map, which provides a "landscape" for our watershed
# segmentation
distance_map = ndimage.distance_transform_edt(1 - image)
# Compute the watershed segmentation; it will over-segment the image
filled = morphology.watershed(1 - distance_map, markers=marker_image)
# In the over-segmented image, combine touching regions
filled_connected = measure.label(filled != 1, background=0) + 1
# In this optional step, filter out all regions that are < 25% the size
# of the mean region area found
filled_regions = measure.regionprops(filled_connected)
mean_area = np.mean([r.area for r in filled_regions])
filled_filtered = filled_connected.copy()
for r in filled_regions:
if r.area < 0.25 * mean_area:
coords = np.array(r.coords).astype(int)
filled_filtered[coords[:, 0], coords[:, 1]] = 0
# And display!
f, (ax0, ax1, ax2) = plt.subplots(1, 3)
ax0.imshow(image, cmap='gray')
ax1.imshow(filled_filtered, cmap='spectral')
ax2.imshow(distance_map, cmap='gray')
plt.savefig('/tmp/labeled_filled_regions.png', bbox_inches='tight')