Creating TFRecord file causes UnicodeDecodeError when being read - tensorflow

I'm following the directions to create a TFRecord file in this example for object detection:
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
I have created a Jupyter Notebook with Python 3.6.4 and TensorFlow 1.6.0 with these instructions.
I changed the value assignment inside create_tf_example to enter the correct information from my example (which is a PIL Image):
def create_tf_example(example):
height = example.height
width = example.width
filename = tf.compat.as_bytes(example.filename)
# convert Image to bytes for TF
imgByteArr = io.BytesIO()
example.save(imgByteArr, format='PNG')
imgByteArr = imgByteArr.getvalue()
encoded_image_data = tf.compat.as_bytes(imgByteArr, encoding='utf-8') # Encoded image bytes
image_format = b'png'
xmins = [0]
xmaxs = [width]
ymins = [0]
ymaxs = [height]
classes_text = [b'Test']
classes = [1]
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
However, it will create the file without issue and when I try to read it again, I'm getting an error. This is the same error when I try to read the TFRecord file later with TensorFlow (label_map_util.load_labelmap(PATH_TO_LABELS)):
open('data/tfrecord/label_map.pbtxt').read()
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-26-dfa57505da97> in <module>()
----> 1 open('data/tfrecord/label_map.pbtxt').read()
~/Documents/.../bin/../lib/python3.6/codecs.py in decode(self, input, final)
319 # decode input (taking the buffer into account)
320 data = self.buffer + input
--> 321 (result, consumed) = self._buffer_decode(data, self.errors, final)
322 # keep undecoded input until the next call
323 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbe in position 10: invalid start byte
It isn't clear to me what I should be doing differently because of tf_example.SerializeToString() seems to be doing the encoding to a string in the example.
Here is the output if it helps from open('data/tfrecord/label_map.pbtxt', 'rb').read():
b'kX\x00\x00\x00\x00\x00\x00\x05#\xbe\xe0\n\xe7\xb0\x01\n\x15\n\x0bimage/width\x12\x06\x1a\x04\n\x02\x98\x03\n\x17\n\x0cimage/format\x12\x07\n\x05\n\x03png\n!\n\x18image/object/class/label\x12\x05\x1a\x03\n\x01\x01\n\x16\n\x0cimage/height\x12\x06\x1a\x04\n\x02\x98\x03\nM\n\x17image/object/class/text\x122\n0\n.Qma8oN1eQwAiKUQZJRXry1VD2yCwYWnZQ6rtQwsC8LzjDu\nR\n\x0fimage/source_id\x12?\n=\n;data/png/Qma8oN1eQwAiKUQZJRXry1VD2yCwYWnZQ6rtQwsC8LzjDu.png\n"\n\x16image/object/bbox/ymin\x12\x08\x12\x06\n\x04\x00\x00\x00\x00\n\xf2\xac\x01\n\rimage/encoded\x12\xdf\xac\x01\n\xdb\xac\x01\n\xd7\xac\x01\x89PNG\r\n
Thank you!

Related

Trying to generate TFrecord but I,m getting this error "FileNotFoundError: [Errno 2] No such file or directory"

I'm following this link https://colab.research.google.com/drive/11ko0DBnI1QLxVoJQR8gt9b4JDcvbCrtU#scrollTo=A_tyvKnBP6qD to build my object detector. I am using Google collab. My workspace structure is exactly as followed in this link. Everything was going fine until this block of code:
from object_detection.utils import dataset_util
%cd /content/drive/MyDrive/Gun_Detection/models
data_base_url = '/content/drive/MyDrive/Gun_Detection/data'
image_dir = data_base_url + 'images/'
def class_text_to_int(row_label):
if row_label == 'pistol':
return 1
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object']) #we wanna group by
gb = df.groupby(group) #split data into group data by splitting, applying n combining
return [data(filename, gb.get_group(x))
for filename, x in zip(gb.groups.keys(), gb.groups)] #add group keys to index to identify pieces.
def create_tf_example(group, path):
with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
for csv in ['train_labels', 'test_labels']:
writer = tf.io.TFRecordWriter(data_base_url + csv + '.record')
path = os.path.join(image_dir)
examples = pd.read_csv(data_base_url + csv + '.csv')
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), data_base_url + csv + '.record')
print('Successfully created the TFRecords: {}'.format(data_base_url + csv + '.record'))
After this error, datatrain_label.record got generated in my Gun Detection folder in my drive. I am confused 😕 I can't proceed further. Please help!
N.B: I am not pro with python and am still learning. Trying hard to understand the code but I honestly don't.
Provide the data_base_url path as '/content/drive/MyDrive/Gun_Detection/data/' you are missing a / at the end of it due to which you code can not find the image_dir.
Use os.path.join() function in order to avoid such case.

Tensorflow TFWriter incorrect data serialization

I have a dataset created using ImageLabeller from MatLab, when trying to translate the dataset to TFrecord, according to the instructions in here, some of the coordinates are incorrect, it appears as if the min is greater than the max.
I have tried removing the examples that fail but it seems the error is not related to that, failed examples always appear in the same position. I have tried with images from the MODD2 and from a dataset created with the imageLabeller using larger images and it works correctly.
The code used for the generation of the TFrecord files is the following:
# MODD2 format: x y w h -> x,y are the top left corner coordinates
def read_drone_mat_file(file_number):
# navigate to the modd2 directory
bbox_d = []
bbox_o = []
filename = []
# for each file, load it into data and append the obstacles information into the bbox list
mat = os.listdir(drones_dir)[file_number]
frame = os.path.join(drones_dir, mat)
data = sio.loadmat(frame)
for obj in data['drone']:
bbox_d.append(obj)
for obj in data['obstacles']:
bbox_o.append(obj)
filename.append(mat[0:9])
return bbox_d, bbox_o, filename
# %% Helper function to create a tfexample for the drone data
def create_drone_tfexample(drones, obstacles, index, image_path):
image_format = b'jpg'
filename = os.listdir(image_path)[index+2]
# load corresponding image (only use left images)
with tf.io.gfile.GFile(os.path.join(image_path, filename), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
wsize, hsize = (width, height)
#basewidth = 640
# if width > basewidth:
# wpercent = (basewidth/float(image.size[0]))
# hsize = int((float(image.size[1])*float(wpercent)))
# wsize = basewidth
# image = image.resize((basewidth,hsize), Image.ANTIALIAS)
# buffered = io.BytesIO()
# image.save(buffered, format="JPEG")
# encoded_jpg = buffered.getvalue()
filename = os.path.splitext(filename)[0].encode('utf-8')
create_drone_tfexample.source_id += 1
source_id_s = "{}".format(create_drone_tfexample.source_id).encode('utf-8')
# tfrecord features definition
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
# for each image
for obj in drones:
xmins.append(obj[0] / width)
xmaxs.append((obj[0]+obj[2]) / width)
ymins.append(obj[1] / height)
ymaxs.append((obj[1]+obj[3]) / height)
# until the drone dataset is available, all obstacles are class 0
classes_text.append(bytes('drone', 'utf-8'))
classes.append(2)
for obj in obstacles:
xmins.append(obj[0] / width)
xmaxs.append((obj[0]+obj[2]) / width)
ymins.append(obj[1] / height)
ymaxs.append((obj[1]+obj[3]) / height)
# until the drone dataset is available, all obstacles are class 0
classes_text.append(bytes('obstacles', 'utf-8'))
classes.append(1)
print(source_id_s+b": "+filename)
# print("xmins: {}".format(xmins))
# print("xmaxs: {}".format(xmaxs))
# print("ymins: {}".format(ymins))
# print("ymaxs: {}".format(ymaxs))
# create tf_example
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(hsize),
'image/width': dataset_util.int64_feature(wsize),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(source_id_s),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
create_drone_tfexample.source_id = 0
# %% Create final dataset WARNING: Slow and destructive
train_writer = tf.io.TFRecordWriter(
output_dir+'drone_train_truncated.tfrecord')
test_writer = tf.io.TFRecordWriter(output_dir+'drone_test_truncated.tfrecord')
drone_test_writer = tf.io.TFRecordWriter(
output_dir + 'drone_only_test.tfrecord')
create_drone_tfexample.source_id = 0
# Drones dataset
for index, mat in enumerate(os.listdir(drones_dir)):
boxes_d, boxes_o, filename = read_drone_mat_file(index)
print()
# Pass the bounding boxes to the create_tfexample function
if index < 210:
image_path = drones_image_root
tf_example = create_drone_tfexample(
boxes_d, boxes_o, index, image_path)
# Write the tf_example into the dataset
if random.randint(1, 100) <= 80: # 80% Train 20% Validation
train_writer.write(tf_example.SerializeToString())
else:
test_writer.write(tf_example.SerializeToString())
drone_test_writer.write(tf_example.SerializeToString())
The examples fail when trying to use them for training, to read the examples, I use the following code:
# %% Extract images from dataset
dataset_file = "drone_only_test.tfrecord"
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
raw_dataset = tf.data.TFRecordDataset(
"<path_to_dataset>"+dataset_file)
print('_______________________________________________________________________________________')
image_feature_description = {
# 'image/height': dataset_util.int64_feature(hsize),
# 'image/width': dataset_util.int64_feature(512),
# 'image/filename': dataset_util.bytes_feature(filename),
# 'image/source_id': dataset_util.bytes_feature(filename),
# 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
# 'image/format': dataset_util.bytes_feature(image_format),
# 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
# 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
# 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
# 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
# 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
# 'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/height': tf.io.FixedLenFeature([], tf.int64),
'image/width': tf.io.FixedLenFeature([], tf.int64),
'image/filename': tf.io.FixedLenFeature([], tf.string),
'image/source_id': tf.io.FixedLenFeature([], tf.string),
'image/encoded': tf.io.FixedLenFeature([], tf.string),
'image/format': tf.io.FixedLenFeature([], tf.string),
'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
'image/object/class/text': tf.io.VarLenFeature(tf.string),
'image/object/class/label': tf.io.VarLenFeature(tf.int64),
}
def _parse_image_function(example_proto):
# Parse the input tf.train.Example proto using the dictionary above.
return tf.io.parse_single_example(example_proto, image_feature_description)
parsed_image_dataset = raw_dataset.map(_parse_image_function)
for image_features in parsed_image_dataset.take(10):
image_raw = image_features['image/encoded'].numpy()
display.display(display.Image(data=image_raw))
encoded_jpg_io = io.BytesIO(image_raw)
image = Image.open(encoded_jpg_io)
image.save("out.jpg", format="JPEG")
print(f'ID: {image_features["image/filename"]}')
print(f'XMIN: {image_features["image/object/bbox/xmin"].values*640}')
print(f'XMAX: {image_features["image/object/bbox/xmax"].values*640}')
print(f'YMIN: {image_features["image/object/bbox/ymin"].values*480}')
print(f'YMAX: {image_features["image/object/bbox/ymax"].values*480}')
print('---------------------')
print(
f'WIDTH: {image_features["image/object/bbox/xmax"].values*640 - image_features["image/object/bbox/xmin"].values*640}')
print(
f'HEIGHT: {image_features["image/object/bbox/ymax"].values*480 - image_features["image/object/bbox/ymin"].values*480}')
For the example in the fourth position, the output is the following:
ID: b'color_00000036'
XMIN: [179. 175. 5.]
XMAX: [387. 210. 21.]
YMIN: [263. 193. 242.]
YMAX: [372. 6. 248.]
---------------------
WIDTH: [208. 35. 16.]
HEIGHT: [ 109. -187. 6.]
The matlab output for the same image is the following:
ground_truth =
179 175 5
263 193 242
208 35 16
109 69 6
The versions used are the following:
Windows 10 64-bit
Python 3.7.9 64-bit
Tensorflow 2.4.0
Scipy 1.5.4
Tensorflow Object Detection API master
The issue ended up being related to scipy.io.loadmat() casting the data into np.uint8, the solution is to pass mat_dtype=True as a parameter so it loads everything as np.float64.
Not the most efficient thing but it works.
Thank you very much.

Tensorflow Object-Detection Fine-Tuning leads to incorrect accuracy values

I am working with the Tensorflow Object-Detection API and want to use a pre-trained Faster R-CNN Resnet101 model on Kitti image data and fine-tune it on Cityscapes image data. I downloaded the pre-trained model here.
This script creates the tfrecord files. I use this script to create tfrecord files from Cityscape (CS) images.
The CS tf_records are afterward used in order to fine-tune the pre-trained Resnet model. For this task, I use this
python3.5 model_main.py --pipeline_config_path={Path to config file in ../samples/configs/} --model_dir={Output directory} --num_train_steps={Train Steps} --sample_1_of_n_eval_examples=1 --alsologtostderr
Using only CS Training and Validation data lead to an COCO accuracy of -1.000
Average Precision (AP) #[ IoU=0.5:0.95 | area=all | maxDets=100 ] = -1.000
....
I tried different things:
Train on CS data and validate on Kitti data. This lead to an COCO accuracy that is not -1.000 but very low. Between 0.01 and 1.5% (after 10.000 training steps)
Looked at Tensorboard visualizations. The loss falls from 0.05 to 0.01 over the first 1.500 iterations and stays over the last 8.500 iterations around 2.5e-4 and does not change much. (I would upload an image if I would know how..)
Fine-tuned the pre-trained model with manipulated Kitti data. I changed the content of the tfrecord files that create the Kitti tfrecord files. By this, I mean I deleted all of the useless variables (like 3D Annotations and so on) in the tfrecord data in order to have similar content to the CS tfrecords I created (code below). Using these manipulated Kitti data also lead to a validation accuracy that seems to be normal (around 70-80%). Therefore, I expect that this error is not caused by an missing attribute in the tfrecords.
An inference of the CS data on the pre-trained Resnet model leads to an accuracy around 20% and this is what I expect. Kitti inference leads to an accuracy around 85%.
Using CS tfrecords with the following content per image:
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
Using this code to encode an image
with tf.gfile.GFile(os.path.join(image_path, '{}'.format(currentImageName)), 'rb') as fid:
encoded_image_data = fid.read()
encoded_image_io = io.BytesIO(encoded_image_data)
Could the coding of the data be the reason? Or what could be another source for an error? As mentioned, I tried several things and none of them worked as expected. Fine-tuning should not be that hard or do I miss any point?
As mentioned in point 4, I tested the inference and the tf_record files and therefore, I expect that it is possible to fine-tune the model.
In general, I expect that the accuracy is not close to 0% after 10.000 iterations.
Everything looks a bit strange and I do not know what the error is. Therefore, I would appreciate each hint/remark/solution for this issue.
EDIT:
def create_tf_example(currentName, anno_path, image_path):
currentNameSplit = currentName.split('.')[0]
currentImageName = currentNameSplit + '.png'
with tf.gfile.GFile(os.path.join(image_path, '{}'.format(currentImageName)), 'rb') as fid:
encoded_image_data = fid.read()
encoded_image_io = io.BytesIO(encoded_image_data)
image = Image.open(encoded_image_io)
image = np.asarray(image)
width = int(image.shape[1])
height = int(image.shape[0])
filename = os.path.join(image_path, '{}'.format(currentImageName))
image_format = 'png' # b'jpeg' or b'png'
with open(anno_path + currentName) as file:
lines = file.readlines()
xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = [] # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = [] # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = [] # List of string class name of bounding box (1 per box)
classes = [] # List of integer class id of bounding box (1 per box)
for li in range(len(lines)):
print('Lines[li]: {}'.format(lines[li]))
xmins.append(float(lines[li].split()[0]) / width)
xmaxs.append(float(lines[li].split()[2]) / width)
ymins.append(float(lines[li].split()[1]) / height)
ymaxs.append(float(lines[li].split()[3]) / height)
classID = lines[li].split()[4]
if int(classID) == 0:
className = 'Car'
classes_text.append(className.encode('utf8'))
classID = 0
classes.append(classID+1) # add 1 because class 0 is always reserved for 'background'
elif int(classID) == 1:
className = 'Person'
classes_text.append(className.encode('utf8'))
classID = 1
classes.append(classID+1)
else:
print('Error with Image Annotations in {}'. format(currentName))
difficult_obj = [0] * len(xmins)
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer_training = tf.python_io.TFRecordWriter(FLAGS.output_path_Training)
writer_valid = tf.python_io.TFRecordWriter(FLAGS.output_path_Test)
writer_test = tf.python_io.TFRecordWriter(FLAGS.output_path_Valid)
allAnnotationFiles = []
os.chdir(FLAGS.anno_path)
for file in sorted(glob.glob("*.{}".format('txt'))):
allAnnotationFiles.append(file)
counter=0
for currentName in allAnnotationFiles:
if counter < 2411:
tf_example = create_tf_example(currentName, FLAGS.anno_path, FLAGS.image_path)
writer_training.write(tf_example.SerializeToString())
counter += 1
elif counter > 2411 and counter < 2972:
tf_example = create_tf_example(currentName, FLAGS.anno_path, FLAGS.image_path)
writer_valid.write(tf_example.SerializeToString())
counter += 1
elif counter <= 3475:
tf_example = create_tf_example(currentName, FLAGS.anno_path, FLAGS.image_path)
writer_test.write(tf_example.SerializeToString())
counter += 1
writer_training.close()
writer_test.close()
writer_valid.close()
if __name__ == '__main__':
tf.app.run()

raise ValueError('Image with id {} already added.'.format(image_id)) in Tensorflow object detection api

Image training is ok with ssd_mobilenet_v1_coco in tensorflow object detection api.
getting the error while testing:
File "/home/hipstudents/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/object_detection-0.1-py3.6.egg/object_detection/utils/object_detection_evaluation.py", line 203, in add_single_ground_truth_image_info
raise ValueError('Image with id {} already added.'.format(image_id))
Please help.
System Info:
What is the top-level directory of the model you are using: ~/
Have I written custom code (as opposed to using a stock example script provided in TensorFlow): Yes, written scripts to convert .xml files to tf record
OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Linux Ubuntu 16.04
TensorFlow installed from (source or binary): Compiled from source
TensorFlow version (use command below): 1.11.0
Bazel version (if compiling from source): 0.16.1
CUDA/cuDNN version: 9.0.176, cuDNN: 9.0
GPU model and memory: GeForce GTX1080Ti, 11GB
Exact command to reproduce: python eval.py --logtostderr --pipeline_config_path=training/ssd_mobilenet_v1_coco.config --checkpoint_dir=training/ --eval_dir=eval/
I created dataset manually. Then label it using labelimg. after labeling I created csv file for image annotation and file name. then I create tf record. I follow this tutorial: https://towardsdatascience.com/how-to-train-your-own-object-detector-with-tensorflows-object-detector-api-bec72ecfe1d9
My tfrecord generator for training and testing image:
"""
Usage:
# From tensorflow/models/
# Create train data:
python generate_tfrecord.py --csv_input=data/train_labels.csv --output_path=train.record
# Create test data:
python generate_tfrecord.py --csv_input=data/test_labels.csv --output_path=test.record
"""
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
# TO-DO replace this with label map
def class_text_to_int(row_label):
if row_label == 'Field':
return 1
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
path = os.path.join(os.getcwd(), 'Images')
examples = pd.read_csv(FLAGS.csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), FLAGS.output_path)
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
tf.app.run()
In the ssd_mobilenet_coco_v1.config file, num_examples was 8000. In my case, test dataset only has 121 samples. I forgot to update that and got new kind of error that I couldn't find on the Internet. As it is a silly mistake, so I think a very few people did that. this answer might help someone who will do this kind of mistake. I changed the following in the config file and the error is resolved:
eval_config: {
#num of test images. In my case 121. Previously It was 8000
num_examples: 121
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 10
}
In my case, the problem was that I'd included images multiple times when constructing tfrecord files. Though now obvious, I hadn't noticed that many categories of the Open Images Dataset share the same images (which would have the same id in the evaluation, thus the error...). Once I'd corrected the algorithm creating the tfrecords, the error was gone.
I have solved the problem with this article: https://www.coder.work/article/3120495
By just adding 2 line
eval_config {
num_examples: 50
use_moving_averages: false
metrics_set: "coco_detection_metrics"
}

How to check label conversion to tfrecord happened successfully or not?

I have converted a image and the bound box co-ordinates in the form a TFRecord file. How to ensure that the file I have obtained has the required data i.e the label conversion happened successfully?
The bounding box center, height, width, class id were stored in a txt file. I read those data into lists to obtain xmin, xmax, ymin, ymax. And then I tried to convert it into a tfrecord file.
import base64
import tensorflow as tf
from object_detection.utils import dataset_util
flags = tf.app.flags
flags.DEFINE_string('output_path','D:\\filecord.tfrecords', 'Path to output TFRecord')
FLAGS = flags.FLAGS
def create_tf_example():
# TODO START: Populate the following variables from your example.
with open(r'C:\Users\SP-TestMc\Downloads\task1_folder\23324_PID_7660_000.txt', 'r') as file:
rows = [[float(x) for x in line.split(' ')] for line in file]
cols = [list(col) for col in zip(*rows)]
l=len(cols[0])
xmin=[]
xmax=[]
ymin=[]
ymax=[]
cid=[]
for i in range(l):
xmin.append(cols[1][i]-cols[3][i]/2)
xmax.append(cols[1][i]+cols[3][i]/2)
ymin.append(cols[2][i]-cols[4][i]/2)
ymax.append(cols[2][i]+cols[4][i]/2)
cid.append(int(cols[0][i]))
height = 416 # Image height
width = 416 # Image width
filename =r'C:\Users\SP-TestMc\Downloads\task1_folder\23324_PID_7660_000.png' # Filename of the image. Empty if image is not from file
filename=filename.encode()
img_file = open(filename, 'rb')
encoded_image_data = base64.b64encode(img_file.read())
image_format = b'png' # b'jpeg' or b'png'
xmins = xmin # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = xmax # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = ymin # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = ymax # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = ['a'.encode(), 'a'.encode(),'b'.encode(),'b'.encode()] # List of string class name of bounding box (1 per box)
classes = cid # List of integer class id of bounding box (1 per box)
# TODO END
tf_label_and_data = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_label_and_data
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
tf_example = create_tf_example()
writer.write(tf_example.SerializeToString())
writer.close()
a=None
main(a)