How to export TFRecords from pts label files for tensorflow object detection api?

usually when we generate TFRecords from xml label files (from labelimg for example), there are the values of x.min, x.max, y.min and y.max, which show a square label.
we can make a CSV data out of it and generate the TFRecords from it.
but in the case of pts, the values are as a non-square bounding box, e.g:
bounding_box: 534.588998862 232.095176337; 101.596234357 388.45367463; 51.3295676906 249.25367463; 484.322332196 92.8951763367
so there is four x and y points, not just two as the labelimg gives.
can someone explain to me how generate TFRecord from pts?

So just in case anyone else had the same question, i wrote a script that'll make those four points as a square with xmin xmax ymin ymax, so we can get the tfrecord easily as like from xml labelimg.
here it is:
import os
import glob
import pandas as pd
from PIL import Image
import csv
for pts_file in glob.glob("./labels" + '/*.pts'):
with open(pts_file) as f:"./img/" + pts_file[9:-3] + "jpg")
filename = pts_file[9:-3] + "jpg"
width = str(im.size[0])
height = str(im.size[1])
classs = "fish"
lines = f.readlines()
content = [line.split(' ')for line in open (pts_file)]
xmax = max(int(float(content[0][1])), int(float(content[0][4])), int(float(content[0][7])), int(float(content[0][10])))
xmin = min(int(float(content[0][1])), int(float(content[0][4])), int(float(content[0][7])), int(float(content[0][10])))
ymax = max(int(float(content[0][3][0:5])), int(float(content[0][6][0:5])), int(float(content[0][9][0:5])), int(float(content[0][11][0:5])))
ymin = min(int(float(content[0][3][0:5])), int(float(content[0][6][0:5])), int(float(content[0][9][0:5])), int(float(content[0][11][0:5])))
with open(r'name', 'a', newline='') as f:
writer = csv.writer(f)
print('Successfully converted pts to csv.')


How to tell `photutils` to plot only apertures which satisfy a condition?

I'm following an example in the photutils documentation to detect sources in an image:
from astropy.stats import sigma_clipped_stats
from photutils.datasets import load_star_image
import numpy as np
import matplotlib.pyplot as plt
from astropy.visualization import SqrtStretch
from astropy.visualization.mpl_normalize import ImageNormalize
from photutils.detection import DAOStarFinder
from photutils.aperture import CircularAperture
# Load image
hdu = load_star_image() # load a star image from the dataset
data =[0:101, 0:101]
mean, median, std = sigma_clipped_stats(data, sigma = 3.0) # estimate noise
# Find stars in the image that have FWHMs of 3 pixels and peaks ~ 5 sigma > bg
daofind = DAOStarFinder(fwhm = 3.0, threshold = 5.*std)
sources = daofind(data - median)
# Print position and photometric data for each star in the image
for col in sources.colnames:
sources[col].info.format = '%.8g' # for consistent table output
positions = np.transpose((sources['xcentroid'], sources['ycentroid']))
apertures = CircularAperture(positions, r = 4.)
norm = ImageNormalize(stretch = SqrtStretch())
plt.imshow(data, cmap = 'Greys', origin = 'lower', norm = norm,
interpolation = 'nearest')
for i in range(len(sources)):
if sources[i][-1] < -2:
apertures.plot(color = 'r', lw = 1.5, alpha = 0.5
Which produces
I've added the last four lines, with the intention to plot apertures around only the brightest stars. However, the for loop doesn't change the image. I understand why (it's plotting all apertures multiple times, once for each of the 4 stars with mag < -2), but how do I change it to plot them for only those stars?

Images and masks visualisation - Colab

Hei everyone.
I have set up an image and mask visual check in Colab using matplotlib, NumPy and Random and it was expected to print the same image number for both (image and mask), but unfortunately, it's not.
For whatever reason, the images don't correspond. Both images have the same size and number/name.
Does anyone have some hints on how to fix this? Thank you in advance!
import random
import numpy as np
image_number = random.randint(0,len(image_dataset))
plt.imshow(np.reshape(image_dataset[image_number], (patch_size,patch_size,3)))
plt.imshow(np.reshape(mask_dataset[image_number], (patch_size,patch_size,3)))
image printed using the code above
This is how I'm importing the training images. (doing the same for masks)
images_dataset = [] #TRAIN IMAGES
for path, subdirs, files in os.walk(root_directory):
dirname = path.split(os.path.sep)[-1]
if dirname =="images":
images = os.listdir(path)
for i, image_name in enumerate(images):
if image_name.endswith('.png'):
image = cv2.imread(path + "/" + image_name, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
SIZE_X = (image.shape[1]//patch_size)*patch_size
SIZE_Y = (image.shape[0]//patch_size)*patch_size
image = Image.fromarray(image)
image = image.crop((0,0,SIZE_X, SIZE_Y))
image = np.array(image)
try to sort out the data.
images = sorted(os.listdir(path))

Tensorflow lite only using the first item in the labelmap.txt file when identifying items

I have installed tensorflow 1.15 and created a custom model. I converted it into a .tflite file so tensorflow lite can read it. Then I ran the following code:
import os
import argparse
import cv2
import numpy as np
import sys
import glob
import importlib.util
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5)
parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', default=None)
parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', default=None)
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
IM_NAME = args.image
IM_DIR = args.imagedir
if (IM_NAME and IM_DIR):
print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python -h" for help.')
if (not IM_NAME and not IM_DIR):
IM_NAME = 'test1.jpg'
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
if use_TPU:
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
CWD_PATH = os.getcwd()
if IM_DIR:
images = glob.glob(PATH_TO_IMAGES + '/*')
elif IM_NAME:
images = glob.glob(PATH_TO_IMAGES)
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('')])
interpreter = Interpreter(model_path=PATH_TO_CKPT)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
for image_path in images:
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
imH, imW, _ = image.shape
image_resized = cv2.resize(image_rgb, (width, height))
input_data = np.expand_dims(image_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.imshow('Object detector', image)
if cv2.waitKey(0) == ord('q'):
Now, my custom model seems to work. It located the items on the image correctly but it labels everything with the first item on the labelmap.txt. For example:
The model identifies the remotes in the images but labels them as "key" because it is the first thing in the labelmap.txt. I don't know why this is happening, can someone please help me. I am sorry if anything is unclear. Please let me know and I will try my best to clarify a little better. Thank you.
I followed the

How to Urlretrieve and crop image based on data from CSV file?

I have a CSV file with url's and box coordinates (x coordinate of the top left corner, y coordinate of the top left corner, x coordinate of the bottom right corner and y coordinate of the bottom right corner) and I would like to acquire the image, crop it based on the coordinates (to 256x256) and then save the image. Unfortunately a solution to download the whole database and then create a separate with cropped images is difficult due to the size of the database. That for, it is necessary to create the image database with cropped images from the beginning. Another way is to save the image and then subsequently crop it and rewrite the initial image (and then i += 1 iterate to the next one).
Would the current approach work or should I use a different method for it? Additonally, how would I save the acquired images to a specified folder, as currently it downloads to the same folder as the script.
import urllib.request
import csv
import numpy as np
import pandas as pd
from io import BytesIO
import requests
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
filename = "images"
# open file to read
with open("data_test.csv".format(filename), 'r') as csvfile:
reader = csv.reader(csvfile)
# pop header row (1st row in csv)
header = next(reader)
# iterate on all lines
i = 0
for line in csvfile:
splitted_line = line.split(',')
# check if we have an image URL
if splitted_line[1] != '' and splitted_line[1] != "\n":
response = requests.get(splitted_line[1])
img =
#crop_img = img[splitted_line[2]:splitted_line[3], splitted_line[4]:splitted_line[5]]
#crop_img = img[315:105, 370:173] + ".png")
#crop_img = img[105:105+173,315:315+370]
#[y: y + h, x: x + w]
new_img = img.resize((256, 256)) + ".png")
imgplot = plt.imshow(img)
# urllib.request.urlopen(splitted_line[1])
print("Image saved for {0}".format(splitted_line[0]))
# img = cv2.imread(img_path, 0)
i += 1
print("No result for {0}".format(splitted_line[0]))
Any further recommendations are welcome.
Edit: The latest version gives me error :
crop_img = img[105:105+173,315:315+370]
TypeError: 'JpegImageFile' object is not subscriptable
I solved the problem using Bytes.IO and some cropping/resizing techniques.
import csv
from io import BytesIO
import requests
from PIL import Image
import matplotlib.pyplot as plt
filename = "images"
# open file to read
with open("data_test.csv".format(filename), 'r') as csvfile:
reader = csv.reader(csvfile)
# pop header row (1st row in csv)
header = next(reader)
# iterate on all lines
i = 0
for line in csvfile:
splitted_line = line.split(',')
# check if we have an image URL
if splitted_line[1] != '' and splitted_line[1] != "\n":
response = requests.get(splitted_line[1])
img =
#im.crop(box) ⇒ 4-tuple defining the left, upper, right, and lower pixel coordinate
left_x = int(splitted_line[2])
top_y = int(splitted_line[3])
right_x = int(splitted_line[4])
bottom_y = int(splitted_line[5])
crop = img.crop((left_x, top_y, right_x, bottom_y))
new_img = crop.resize((256, 256))
# preview new images
imgplot = plt.imshow(new_img)
""" + ".png")
print("Image saved for {0}".format(splitted_line[0]))
i += 1
print("No result for {0}".format(splitted_line[0]))
Hope it will help someone. Any optimization recommendations are still welcome.

matplotlib contour plot geojson output?

I'm using python matplotlib to generate contour plots from an 2D array of temperature data (stored in a NetCDF file), and I am interested in exporting the contour polygons and/or lines into geojson format so that I can use them outside of matplotlib. I have figured out that the "pyplot.contourf" function returns a "QuadContourSet" object which has a "collections" attribute that contains the coordinates of the contours:
contourSet = plt.contourf(data, levels)
collections = contourSet.collections
Does anyone know if matplotlib has a way to export the coordinates in "collections" to various formats, in particular geojson? I've searched the matplotlib documentation, and the web, and haven't come up with anything obvious.
geojsoncontour is a Python module that converts matplotlib contour lines to geojson.
It uses the following, simplified but complete, method to convert a matplotlib contour to geojson:
import numpy
from matplotlib.colors import rgb2hex
import matplotlib.pyplot as plt
from geojson import Feature, LineString, FeatureCollection
grid_size = 1.0
latrange = numpy.arange(-90.0, 90.0, grid_size)
lonrange = numpy.arange(-180.0, 180.0, grid_size)
X, Y = numpy.meshgrid(lonrange, latrange)
Z = numpy.sqrt(X * X + Y * Y)
figure = plt.figure()
ax = figure.add_subplot(111)
contour = ax.contour(lonrange, latrange, Z, levels=numpy.linspace(start=0, stop=100, num=10),
line_features = []
for collection in contour.collections:
paths = collection.get_paths()
color = collection.get_edgecolor()
for path in paths:
v = path.vertices
coordinates = []
for i in range(len(v)):
lat = v[i][0]
lon = v[i][1]
coordinates.append((lat, lon))
line = LineString(coordinates)
properties = {
"stroke-width": 3,
"stroke": rgb2hex(color[0]),
line_features.append(Feature(geometry=line, properties=properties))
feature_collection = FeatureCollection(line_features)
geojson_dump = geojson.dumps(feature_collection, sort_keys=True)
with open('out.geojson', 'w') as fileout:
A good start to be sure to export all contours is to use the get_paths method when you iterate over the Collection objects and then the to_polygons method of Path to get numpy arrays:
Nevertheless the final formatting is up to you.
import matplotlib.pyplot as plt
cs = plt.contourf(data, levels)
for collection in cs.collections:
for path in collection.get_paths():
for polygon in path.to_polygons():
print polygon.__class__
print polygon