Trying to generate TFrecord but I,m getting this error "FileNotFoundError: [Errno 2] No such file or directory" - object-detection

I'm following this link https://colab.research.google.com/drive/11ko0DBnI1QLxVoJQR8gt9b4JDcvbCrtU#scrollTo=A_tyvKnBP6qD to build my object detector. I am using Google collab. My workspace structure is exactly as followed in this link. Everything was going fine until this block of code:
from object_detection.utils import dataset_util
%cd /content/drive/MyDrive/Gun_Detection/models
data_base_url = '/content/drive/MyDrive/Gun_Detection/data'
image_dir = data_base_url + 'images/'
def class_text_to_int(row_label):
if row_label == 'pistol':
return 1
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object']) #we wanna group by
gb = df.groupby(group) #split data into group data by splitting, applying n combining
return [data(filename, gb.get_group(x))
for filename, x in zip(gb.groups.keys(), gb.groups)] #add group keys to index to identify pieces.
def create_tf_example(group, path):
with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
for csv in ['train_labels', 'test_labels']:
writer = tf.io.TFRecordWriter(data_base_url + csv + '.record')
path = os.path.join(image_dir)
examples = pd.read_csv(data_base_url + csv + '.csv')
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), data_base_url + csv + '.record')
print('Successfully created the TFRecords: {}'.format(data_base_url + csv + '.record'))
After this error, datatrain_label.record got generated in my Gun Detection folder in my drive. I am confused 😕 I can't proceed further. Please help!
N.B: I am not pro with python and am still learning. Trying hard to understand the code but I honestly don't.

Provide the data_base_url path as '/content/drive/MyDrive/Gun_Detection/data/' you are missing a / at the end of it due to which you code can not find the image_dir.
Use os.path.join() function in order to avoid such case.

Related

Tensorflow TFWriter incorrect data serialization

I have a dataset created using ImageLabeller from MatLab, when trying to translate the dataset to TFrecord, according to the instructions in here, some of the coordinates are incorrect, it appears as if the min is greater than the max.
I have tried removing the examples that fail but it seems the error is not related to that, failed examples always appear in the same position. I have tried with images from the MODD2 and from a dataset created with the imageLabeller using larger images and it works correctly.
The code used for the generation of the TFrecord files is the following:
# MODD2 format: x y w h -> x,y are the top left corner coordinates
def read_drone_mat_file(file_number):
# navigate to the modd2 directory
bbox_d = []
bbox_o = []
filename = []
# for each file, load it into data and append the obstacles information into the bbox list
mat = os.listdir(drones_dir)[file_number]
frame = os.path.join(drones_dir, mat)
data = sio.loadmat(frame)
for obj in data['drone']:
bbox_d.append(obj)
for obj in data['obstacles']:
bbox_o.append(obj)
filename.append(mat[0:9])
return bbox_d, bbox_o, filename
# %% Helper function to create a tfexample for the drone data
def create_drone_tfexample(drones, obstacles, index, image_path):
image_format = b'jpg'
filename = os.listdir(image_path)[index+2]
# load corresponding image (only use left images)
with tf.io.gfile.GFile(os.path.join(image_path, filename), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
wsize, hsize = (width, height)
#basewidth = 640
# if width > basewidth:
# wpercent = (basewidth/float(image.size[0]))
# hsize = int((float(image.size[1])*float(wpercent)))
# wsize = basewidth
# image = image.resize((basewidth,hsize), Image.ANTIALIAS)
# buffered = io.BytesIO()
# image.save(buffered, format="JPEG")
# encoded_jpg = buffered.getvalue()
filename = os.path.splitext(filename)[0].encode('utf-8')
create_drone_tfexample.source_id += 1
source_id_s = "{}".format(create_drone_tfexample.source_id).encode('utf-8')
# tfrecord features definition
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
# for each image
for obj in drones:
xmins.append(obj[0] / width)
xmaxs.append((obj[0]+obj[2]) / width)
ymins.append(obj[1] / height)
ymaxs.append((obj[1]+obj[3]) / height)
# until the drone dataset is available, all obstacles are class 0
classes_text.append(bytes('drone', 'utf-8'))
classes.append(2)
for obj in obstacles:
xmins.append(obj[0] / width)
xmaxs.append((obj[0]+obj[2]) / width)
ymins.append(obj[1] / height)
ymaxs.append((obj[1]+obj[3]) / height)
# until the drone dataset is available, all obstacles are class 0
classes_text.append(bytes('obstacles', 'utf-8'))
classes.append(1)
print(source_id_s+b": "+filename)
# print("xmins: {}".format(xmins))
# print("xmaxs: {}".format(xmaxs))
# print("ymins: {}".format(ymins))
# print("ymaxs: {}".format(ymaxs))
# create tf_example
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(hsize),
'image/width': dataset_util.int64_feature(wsize),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(source_id_s),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
create_drone_tfexample.source_id = 0
# %% Create final dataset WARNING: Slow and destructive
train_writer = tf.io.TFRecordWriter(
output_dir+'drone_train_truncated.tfrecord')
test_writer = tf.io.TFRecordWriter(output_dir+'drone_test_truncated.tfrecord')
drone_test_writer = tf.io.TFRecordWriter(
output_dir + 'drone_only_test.tfrecord')
create_drone_tfexample.source_id = 0
# Drones dataset
for index, mat in enumerate(os.listdir(drones_dir)):
boxes_d, boxes_o, filename = read_drone_mat_file(index)
print()
# Pass the bounding boxes to the create_tfexample function
if index < 210:
image_path = drones_image_root
tf_example = create_drone_tfexample(
boxes_d, boxes_o, index, image_path)
# Write the tf_example into the dataset
if random.randint(1, 100) <= 80: # 80% Train 20% Validation
train_writer.write(tf_example.SerializeToString())
else:
test_writer.write(tf_example.SerializeToString())
drone_test_writer.write(tf_example.SerializeToString())
The examples fail when trying to use them for training, to read the examples, I use the following code:
# %% Extract images from dataset
dataset_file = "drone_only_test.tfrecord"
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
raw_dataset = tf.data.TFRecordDataset(
"<path_to_dataset>"+dataset_file)
print('_______________________________________________________________________________________')
image_feature_description = {
# 'image/height': dataset_util.int64_feature(hsize),
# 'image/width': dataset_util.int64_feature(512),
# 'image/filename': dataset_util.bytes_feature(filename),
# 'image/source_id': dataset_util.bytes_feature(filename),
# 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
# 'image/format': dataset_util.bytes_feature(image_format),
# 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
# 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
# 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
# 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
# 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
# 'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/height': tf.io.FixedLenFeature([], tf.int64),
'image/width': tf.io.FixedLenFeature([], tf.int64),
'image/filename': tf.io.FixedLenFeature([], tf.string),
'image/source_id': tf.io.FixedLenFeature([], tf.string),
'image/encoded': tf.io.FixedLenFeature([], tf.string),
'image/format': tf.io.FixedLenFeature([], tf.string),
'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
'image/object/class/text': tf.io.VarLenFeature(tf.string),
'image/object/class/label': tf.io.VarLenFeature(tf.int64),
}
def _parse_image_function(example_proto):
# Parse the input tf.train.Example proto using the dictionary above.
return tf.io.parse_single_example(example_proto, image_feature_description)
parsed_image_dataset = raw_dataset.map(_parse_image_function)
for image_features in parsed_image_dataset.take(10):
image_raw = image_features['image/encoded'].numpy()
display.display(display.Image(data=image_raw))
encoded_jpg_io = io.BytesIO(image_raw)
image = Image.open(encoded_jpg_io)
image.save("out.jpg", format="JPEG")
print(f'ID: {image_features["image/filename"]}')
print(f'XMIN: {image_features["image/object/bbox/xmin"].values*640}')
print(f'XMAX: {image_features["image/object/bbox/xmax"].values*640}')
print(f'YMIN: {image_features["image/object/bbox/ymin"].values*480}')
print(f'YMAX: {image_features["image/object/bbox/ymax"].values*480}')
print('---------------------')
print(
f'WIDTH: {image_features["image/object/bbox/xmax"].values*640 - image_features["image/object/bbox/xmin"].values*640}')
print(
f'HEIGHT: {image_features["image/object/bbox/ymax"].values*480 - image_features["image/object/bbox/ymin"].values*480}')
For the example in the fourth position, the output is the following:
ID: b'color_00000036'
XMIN: [179. 175. 5.]
XMAX: [387. 210. 21.]
YMIN: [263. 193. 242.]
YMAX: [372. 6. 248.]
---------------------
WIDTH: [208. 35. 16.]
HEIGHT: [ 109. -187. 6.]
The matlab output for the same image is the following:
ground_truth =
179 175 5
263 193 242
208 35 16
109 69 6
The versions used are the following:
Windows 10 64-bit
Python 3.7.9 64-bit
Tensorflow 2.4.0
Scipy 1.5.4
Tensorflow Object Detection API master
The issue ended up being related to scipy.io.loadmat() casting the data into np.uint8, the solution is to pass mat_dtype=True as a parameter so it loads everything as np.float64.
Not the most efficient thing but it works.
Thank you very much.

I need help.. UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc1 in position 0: invalid start byte

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
flags = tf.compat.v1.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('image_dir', '', 'Path to images')
FLAGS = flags.FLAGS
# replace row_label with the name you annotated your images as
def class_text_to_int(row_label):
if row_label == 'Masked':
return 1
elif row_label == 'No_Masked':
return 2
else :
None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.io.TFRecordWriter(FLAGS.output_path)
path = os.path.join(FLAGS.image_dir)
examples = pd.read_csv(FLAGS.csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), FLAGS.output_path)
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
tf.compat.v1.app.run()
this is my code named generate_tfrecord.py.
I downloaded this code from github as my first tensorflow tfrecord making example, but it makes error.
I am Korean, and I found that this error occurs when my computer name is korean.
But when I typed 'hostname' in my cmd, it returned 'DESKTOP-7AU~~~', which does not include Korean letters.
If you make comment about your required code or information, I will try to give it to you.
in my images - all folder, there are 764 sets of img+xml file and I have already run "xml_to_csv.py"
this code is from https://github.com/Bengemon825/TF_Object_Detection2020
this simplest way: you can rename your hostname with ascii characters.
your can search the question that how to rename the hostname by google.
this problem caused by Python read an non-unicode characters and cannot decode by utf-8.
I had a very similar problem and here is how I solved it - took me many hours to figure out:
If you are a Mac user, MacOS has 'invisible' folder organizing files of the format .DS_Store in every folder. When iterating through your images folder, the code runs into these .DS_Store files which the utf-8 decoder cannot decode. Deleting them is totally harmless although they in fact do re-appear but you don't have to worry about that
So you can get rid of them like this
OR (I preferred this option once figured out the problem): In your code, you can explicitly by-pass them with an if statement that checks only for .xml files or .csv files or .txt whichever files you are working with in your images folder/directory. So something like:
path = 'path to folder containing your .xml files or .csv files or .txt files'
if '.xml' in str(path):
I have also realized that when people directly use this generate_tfrecord.py as is, many tend to forget to explicitly call out their file paths correctly. This also happens for people using the create_pascal_tf_record.py python script of the object_detection api for TensorFlow.
For example, from your code above, flags.DEFINE_string('csv_input', '', 'Path to the CSV input'), you need to fill in the ' ' with your csv directory path and not leave it empty. For example flags.DEFINE_string('csv_input', 'add your csv directory path here', 'Path to the CSV input'). You have to do the same for all the flags.DEFINE_string instances or else you must explicitly spell out the path if you don't want to use the flags.DEFINE_string instances
I hope this is helpful to anyone using a Mac and running into all sorts of UnicodeDecodeError for TFRECORD files. I'm not sure if Windows users run into something similar. Also there could be other reasons but for me this happened to be the cause

Tensorflow Object-Detection Fine-Tuning leads to incorrect accuracy values

I am working with the Tensorflow Object-Detection API and want to use a pre-trained Faster R-CNN Resnet101 model on Kitti image data and fine-tune it on Cityscapes image data. I downloaded the pre-trained model here.
This script creates the tfrecord files. I use this script to create tfrecord files from Cityscape (CS) images.
The CS tf_records are afterward used in order to fine-tune the pre-trained Resnet model. For this task, I use this
python3.5 model_main.py --pipeline_config_path={Path to config file in ../samples/configs/} --model_dir={Output directory} --num_train_steps={Train Steps} --sample_1_of_n_eval_examples=1 --alsologtostderr
Using only CS Training and Validation data lead to an COCO accuracy of -1.000
Average Precision (AP) #[ IoU=0.5:0.95 | area=all | maxDets=100 ] = -1.000
....
I tried different things:
Train on CS data and validate on Kitti data. This lead to an COCO accuracy that is not -1.000 but very low. Between 0.01 and 1.5% (after 10.000 training steps)
Looked at Tensorboard visualizations. The loss falls from 0.05 to 0.01 over the first 1.500 iterations and stays over the last 8.500 iterations around 2.5e-4 and does not change much. (I would upload an image if I would know how..)
Fine-tuned the pre-trained model with manipulated Kitti data. I changed the content of the tfrecord files that create the Kitti tfrecord files. By this, I mean I deleted all of the useless variables (like 3D Annotations and so on) in the tfrecord data in order to have similar content to the CS tfrecords I created (code below). Using these manipulated Kitti data also lead to a validation accuracy that seems to be normal (around 70-80%). Therefore, I expect that this error is not caused by an missing attribute in the tfrecords.
An inference of the CS data on the pre-trained Resnet model leads to an accuracy around 20% and this is what I expect. Kitti inference leads to an accuracy around 85%.
Using CS tfrecords with the following content per image:
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
Using this code to encode an image
with tf.gfile.GFile(os.path.join(image_path, '{}'.format(currentImageName)), 'rb') as fid:
encoded_image_data = fid.read()
encoded_image_io = io.BytesIO(encoded_image_data)
Could the coding of the data be the reason? Or what could be another source for an error? As mentioned, I tried several things and none of them worked as expected. Fine-tuning should not be that hard or do I miss any point?
As mentioned in point 4, I tested the inference and the tf_record files and therefore, I expect that it is possible to fine-tune the model.
In general, I expect that the accuracy is not close to 0% after 10.000 iterations.
Everything looks a bit strange and I do not know what the error is. Therefore, I would appreciate each hint/remark/solution for this issue.
EDIT:
def create_tf_example(currentName, anno_path, image_path):
currentNameSplit = currentName.split('.')[0]
currentImageName = currentNameSplit + '.png'
with tf.gfile.GFile(os.path.join(image_path, '{}'.format(currentImageName)), 'rb') as fid:
encoded_image_data = fid.read()
encoded_image_io = io.BytesIO(encoded_image_data)
image = Image.open(encoded_image_io)
image = np.asarray(image)
width = int(image.shape[1])
height = int(image.shape[0])
filename = os.path.join(image_path, '{}'.format(currentImageName))
image_format = 'png' # b'jpeg' or b'png'
with open(anno_path + currentName) as file:
lines = file.readlines()
xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = [] # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = [] # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = [] # List of string class name of bounding box (1 per box)
classes = [] # List of integer class id of bounding box (1 per box)
for li in range(len(lines)):
print('Lines[li]: {}'.format(lines[li]))
xmins.append(float(lines[li].split()[0]) / width)
xmaxs.append(float(lines[li].split()[2]) / width)
ymins.append(float(lines[li].split()[1]) / height)
ymaxs.append(float(lines[li].split()[3]) / height)
classID = lines[li].split()[4]
if int(classID) == 0:
className = 'Car'
classes_text.append(className.encode('utf8'))
classID = 0
classes.append(classID+1) # add 1 because class 0 is always reserved for 'background'
elif int(classID) == 1:
className = 'Person'
classes_text.append(className.encode('utf8'))
classID = 1
classes.append(classID+1)
else:
print('Error with Image Annotations in {}'. format(currentName))
difficult_obj = [0] * len(xmins)
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer_training = tf.python_io.TFRecordWriter(FLAGS.output_path_Training)
writer_valid = tf.python_io.TFRecordWriter(FLAGS.output_path_Test)
writer_test = tf.python_io.TFRecordWriter(FLAGS.output_path_Valid)
allAnnotationFiles = []
os.chdir(FLAGS.anno_path)
for file in sorted(glob.glob("*.{}".format('txt'))):
allAnnotationFiles.append(file)
counter=0
for currentName in allAnnotationFiles:
if counter < 2411:
tf_example = create_tf_example(currentName, FLAGS.anno_path, FLAGS.image_path)
writer_training.write(tf_example.SerializeToString())
counter += 1
elif counter > 2411 and counter < 2972:
tf_example = create_tf_example(currentName, FLAGS.anno_path, FLAGS.image_path)
writer_valid.write(tf_example.SerializeToString())
counter += 1
elif counter <= 3475:
tf_example = create_tf_example(currentName, FLAGS.anno_path, FLAGS.image_path)
writer_test.write(tf_example.SerializeToString())
counter += 1
writer_training.close()
writer_test.close()
writer_valid.close()
if __name__ == '__main__':
tf.app.run()

Creating TFRecord file causes UnicodeDecodeError when being read

I'm following the directions to create a TFRecord file in this example for object detection:
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
I have created a Jupyter Notebook with Python 3.6.4 and TensorFlow 1.6.0 with these instructions.
I changed the value assignment inside create_tf_example to enter the correct information from my example (which is a PIL Image):
def create_tf_example(example):
height = example.height
width = example.width
filename = tf.compat.as_bytes(example.filename)
# convert Image to bytes for TF
imgByteArr = io.BytesIO()
example.save(imgByteArr, format='PNG')
imgByteArr = imgByteArr.getvalue()
encoded_image_data = tf.compat.as_bytes(imgByteArr, encoding='utf-8') # Encoded image bytes
image_format = b'png'
xmins = [0]
xmaxs = [width]
ymins = [0]
ymaxs = [height]
classes_text = [b'Test']
classes = [1]
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
However, it will create the file without issue and when I try to read it again, I'm getting an error. This is the same error when I try to read the TFRecord file later with TensorFlow (label_map_util.load_labelmap(PATH_TO_LABELS)):
open('data/tfrecord/label_map.pbtxt').read()
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-26-dfa57505da97> in <module>()
----> 1 open('data/tfrecord/label_map.pbtxt').read()
~/Documents/.../bin/../lib/python3.6/codecs.py in decode(self, input, final)
319 # decode input (taking the buffer into account)
320 data = self.buffer + input
--> 321 (result, consumed) = self._buffer_decode(data, self.errors, final)
322 # keep undecoded input until the next call
323 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbe in position 10: invalid start byte
It isn't clear to me what I should be doing differently because of tf_example.SerializeToString() seems to be doing the encoding to a string in the example.
Here is the output if it helps from open('data/tfrecord/label_map.pbtxt', 'rb').read():
b'kX\x00\x00\x00\x00\x00\x00\x05#\xbe\xe0\n\xe7\xb0\x01\n\x15\n\x0bimage/width\x12\x06\x1a\x04\n\x02\x98\x03\n\x17\n\x0cimage/format\x12\x07\n\x05\n\x03png\n!\n\x18image/object/class/label\x12\x05\x1a\x03\n\x01\x01\n\x16\n\x0cimage/height\x12\x06\x1a\x04\n\x02\x98\x03\nM\n\x17image/object/class/text\x122\n0\n.Qma8oN1eQwAiKUQZJRXry1VD2yCwYWnZQ6rtQwsC8LzjDu\nR\n\x0fimage/source_id\x12?\n=\n;data/png/Qma8oN1eQwAiKUQZJRXry1VD2yCwYWnZQ6rtQwsC8LzjDu.png\n"\n\x16image/object/bbox/ymin\x12\x08\x12\x06\n\x04\x00\x00\x00\x00\n\xf2\xac\x01\n\rimage/encoded\x12\xdf\xac\x01\n\xdb\xac\x01\n\xd7\xac\x01\x89PNG\r\n
Thank you!

How to check label conversion to tfrecord happened successfully or not?

I have converted a image and the bound box co-ordinates in the form a TFRecord file. How to ensure that the file I have obtained has the required data i.e the label conversion happened successfully?
The bounding box center, height, width, class id were stored in a txt file. I read those data into lists to obtain xmin, xmax, ymin, ymax. And then I tried to convert it into a tfrecord file.
import base64
import tensorflow as tf
from object_detection.utils import dataset_util
flags = tf.app.flags
flags.DEFINE_string('output_path','D:\\filecord.tfrecords', 'Path to output TFRecord')
FLAGS = flags.FLAGS
def create_tf_example():
# TODO START: Populate the following variables from your example.
with open(r'C:\Users\SP-TestMc\Downloads\task1_folder\23324_PID_7660_000.txt', 'r') as file:
rows = [[float(x) for x in line.split(' ')] for line in file]
cols = [list(col) for col in zip(*rows)]
l=len(cols[0])
xmin=[]
xmax=[]
ymin=[]
ymax=[]
cid=[]
for i in range(l):
xmin.append(cols[1][i]-cols[3][i]/2)
xmax.append(cols[1][i]+cols[3][i]/2)
ymin.append(cols[2][i]-cols[4][i]/2)
ymax.append(cols[2][i]+cols[4][i]/2)
cid.append(int(cols[0][i]))
height = 416 # Image height
width = 416 # Image width
filename =r'C:\Users\SP-TestMc\Downloads\task1_folder\23324_PID_7660_000.png' # Filename of the image. Empty if image is not from file
filename=filename.encode()
img_file = open(filename, 'rb')
encoded_image_data = base64.b64encode(img_file.read())
image_format = b'png' # b'jpeg' or b'png'
xmins = xmin # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = xmax # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = ymin # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = ymax # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = ['a'.encode(), 'a'.encode(),'b'.encode(),'b'.encode()] # List of string class name of bounding box (1 per box)
classes = cid # List of integer class id of bounding box (1 per box)
# TODO END
tf_label_and_data = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_label_and_data
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
tf_example = create_tf_example()
writer.write(tf_example.SerializeToString())
writer.close()
a=None
main(a)