Tensorflow Object Detection API Data Augmentation Settings - tensorflow2.0

I'm training detection models and am unclear on the data augmentation steps.
My config file for my model right now contains:
train_config {
batch_size: 4
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_vertical_flip{
}
}
data_augmentation_options {
random_adjust_brightness {
}
}
data_augmentation_options {
random_black_patches {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
Is this the correct way to apply data augmentations? Am I supposed to put all augmentation settings in the same block? EX: Having only one:
train_config {
batch_size: 4
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
random_horizontal_flip {
}
random_vertical_flip{
}
random_adjust_brightness {
}
random_black_patches {
}
}
Or does it even matter which method I choose?
Also, is there a way I can preview an example of one of my images under these augmentation settings? Maybe using matplotlib to display an image under these augmentation settings?

Related

WARNING:tensorflow:Ignoring detection with image id 1016176252 since it was previously added

Hy,I work with faster_rcnn_resnet101_v1_1024x1024_coco17_tpu-8 pretrained model. I have problems when evaluating the model. The training went without any problems. I start the evaluation of the model with the command:
python model_main_tf2.py --pipeline_config_path=./training_outlook_action_ctx/training_1/pipeline.config --model_dir=./training_outlook_action_ctx/training_1 --checkpoint_dir=./training_outlook_action_ctx/training_1
After the first Loaded cuDNN version 8400, it starts throwing me the following error that repeats itself until it interrupts
WARNING:tensorflow:Ignoring ground truth with image id 1016176252 since it was previously added
W0810 10:17:12.131517 140545620840832 coco_evaluation.py:113] Ignoring ground truth with image id 1016176252 since it was previously added
WARNING:tensorflow:Ignoring detection with image id 1016176252 since it was previously added
W0810 10:17:12.131881 140545620840832 coco_evaluation.py:196] Ignoring detection with image id 1016176252 since it was previously added
WARNING:tensorflow:Ignoring ground truth with image id 1016176252 since it was previously added
W0810 10:17:12.652873 140545620840832 coco_evaluation.py:113] Ignoring ground truth with image id 1016176252 since it was previously added
WARNING:tensorflow:Ignoring detection with image id 1016176252 since it was previously added
W0810 10:17:12.653055 140545620840832 coco_evaluation.py:196] Ignoring detection with image id 1016176252 since it was previously added
WARNING:tensorflow:Ignoring ground truth with image id 1016176252 since it was previously added
here is my pipeline.config file
# Faster R-CNN with Resnet-50 (v1)
# Trained on COCO, initialized from Imagenet classification checkpoint
# This config is TPU compatible.
model {
faster_rcnn {
num_classes: 7
image_resizer {
fixed_shape_resizer {
width: 1024
height: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101_keras'
batch_norm_trainable: true
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
share_box_across_classes: true
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
use_static_shapes: true
use_matmul_crop_and_resize: true
clip_anchors_to_image: true
use_static_balanced_label_sampler: true
use_matmul_gather_in_matcher: true
}
}
train_config: {
batch_size: 2
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
num_steps: 200000
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 100000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "/pretrained_models/faster_rcnn_resnet101_v1_1024x1024_coco17_tpu-8/checkpoint/ckpt-0"
fine_tune_checkpoint_type: "detection"
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_square_crop_by_scale {
scale_min: 0.6
scale_max: 1.3
}
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
use_bfloat16: true # works only on TPUs
}
train_input_reader: {
label_map_path: "./training_outlook_action_ctx/data/label_map.pbtxt"
tf_record_input_reader {
input_path: "./training_outlook_action_ctx/data/train.records"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 2
}
eval_input_reader: {
label_map_path: "./training_outlook_action_ctx/data/label_map.pbtxt"
shuffle: false
tf_record_input_reader {
input_path: "./training_outlook_action_ctx/data/train.records"
}
}
OS: Debian GNU/Linux 11 (bullseye)
Python: 3.9.12
Tensorflow: 2.9.1
I tried to add num_examples and max_evals but failed. No matter how I adjust them, it still throws the same error
I must mention that the evaluation on the second dataset worked normally for me
Thanks in advance
Edi
guys. I found a solution for how I used the script to create images and annotations. More precisely, I used a script that will crop my first-level annotations and create new XML files for cropped images. The filename and path were not good for me in the XML files (wrong path when programming the script).
After the change, the evaluation code error disappeared.
For all the questions I am available.

finetuning EfficientDet-D0 from model zoo on PASCALVOC doesn't recognize class label 1 (TensorFlow Object Detection API)

I've downloaded the EfficientDet D0 512x512 model from the object detection API model zoo, downloaded the PASCAL VOC dataset and preprocessed it with the create_pascal_tf_record.py file. Next I took one of the config files and adjusted it to fit the architecture and VOC dataset. When evaluating the resulting network with the pascal_voc_detection_metrics it gives me a near zero mAP for the first class (airplane), the other classes are performing fine. I'm assuming one of my settings in the config file is wrong (pasted down below), why does this happen and how do i fix this?
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 20
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 512
max_dimension: 512
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 64
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 3
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b0_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 3
num_filters: 64
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "oracle/efficientdet_d0/checkpoint/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "detection"
batch_size: 3
startup_delay_steps: 0
use_bfloat16: false
num_steps: 30000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 512
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 30000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
update_trainable_variables: ["WeightSharedConvolutionalBoxPredictor"]
}
train_input_reader: {
label_map_path: "pascalVOC/pascal_label_map.pbtxt"
tf_record_input_reader {
input_path: "pascalVOC/pascal_train.record"
}
}
eval_config: {
metrics_set: "pascal_voc_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "pascalVOC/pascal_label_map.pbtxt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "pascalVOC/pascal_val.record"
}
}
There is a bug in the way pascal_voc_detection_metrics calculates the metric, fix can be found here

Tensorflow: label_map_item = keypoint_map_dict[kp_config.keypoint_class_name] KeyError: 'item'

I try to make an object detection using Tensorflow following this tutorial:
https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/index.html
Now I'm nearly at the end and need to run mine pipeline.config. When I run this I get the following Error:
model_builder.py", line 844, in keypoint_proto_to_params label_map_item = keypoint_map_dict[kp_config.keypoint_class_name] KeyError: 'item'
My pipeline.config file looks like this:
# hourglass[1] backbone. This config achieves an mAP of 41.92 +/- 0.16 on
# COCO 17 (averaged over 5 runs). This config is TPU compatible.
# [1]: https://arxiv.org/abs/1603.06937
# [2]: https://arxiv.org/abs/1904.07850
model {
center_net {
num_classes: 5
feature_extractor {
type: "hourglass_104"
bgr_ordering: true
channel_means: [104.01362025, 114.03422265, 119.9165958 ]
channel_stds: [73.6027665 , 69.89082075, 70.9150767 ]
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 512
max_dimension: 512
pad_to_max_dimension: true
}
}
object_detection_task {
task_loss_weight: 1.0
offset_loss_weight: 1.0
scale_loss_weight: 0.1
localization_loss {
l1_localization_loss {
}
}
}
object_center_params {
object_center_loss_weight: 1.0
min_box_overlap_iou: 0.7
max_box_predictions: 100
classification_loss {
penalty_reduced_logistic_focal_loss {
alpha: 2.0
beta: 4.0
}
}
}
keypoint_label_map_path: "annotations/label_map_cans_in_fridge.pbtxt"
keypoint_estimation_task {
task_name: "human_pose"
task_loss_weight: 1.0
loss {
localization_loss {
l1_localization_loss {
}
}
classification_loss {
penalty_reduced_logistic_focal_loss {
alpha: 2.0
beta: 4.0
}
}
}
keypoint_class_name: "item"
keypoint_label_to_std {
key: "left_ankle"
value: 0.89
}
keypoint_label_to_std {
key: "left_ear"
value: 0.35
}
keypoint_label_to_std {
key: "left_elbow"
value: 0.72
}
keypoint_label_to_std {
key: "left_eye"
value: 0.25
}
keypoint_label_to_std {
key: "left_hip"
value: 1.07
}
keypoint_label_to_std {
key: "left_knee"
value: 0.89
}
keypoint_label_to_std {
key: "left_shoulder"
value: 0.79
}
keypoint_label_to_std {
key: "left_wrist"
value: 0.62
}
keypoint_label_to_std {
key: "nose"
value: 0.26
}
keypoint_label_to_std {
key: "right_ankle"
value: 0.89
}
keypoint_label_to_std {
key: "right_ear"
value: 0.35
}
keypoint_label_to_std {
key: "right_elbow"
value: 0.72
}
keypoint_label_to_std {
key: "right_eye"
value: 0.25
}
keypoint_label_to_std {
key: "right_hip"
value: 1.07
}
keypoint_label_to_std {
key: "right_knee"
value: 0.89
}
keypoint_label_to_std {
key: "right_shoulder"
value: 0.79
}
keypoint_label_to_std {
key: "right_wrist"
value: 0.62
}
keypoint_regression_loss_weight: 0.1
keypoint_heatmap_loss_weight: 1.0
keypoint_offset_loss_weight: 1.0
offset_peak_radius: 3
per_keypoint_offset: true
}
}
}
train_config: {
batch_size: 8 # higher volume requires more memory, can change this
num_steps: 250000
data_augmentation_options {
random_horizontal_flip {
keypoint_flip_permutation: 0
keypoint_flip_permutation: 2
keypoint_flip_permutation: 1
keypoint_flip_permutation: 4
keypoint_flip_permutation: 3
keypoint_flip_permutation: 6
keypoint_flip_permutation: 5
keypoint_flip_permutation: 8
keypoint_flip_permutation: 7
keypoint_flip_permutation: 10
keypoint_flip_permutation: 9
keypoint_flip_permutation: 12
keypoint_flip_permutation: 11
keypoint_flip_permutation: 14
keypoint_flip_permutation: 13
keypoint_flip_permutation: 16
keypoint_flip_permutation: 15
}
}
data_augmentation_options {
random_crop_image {
min_aspect_ratio: 0.5
max_aspect_ratio: 1.7
random_coef: 0.25
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_adjust_brightness {
}
}
data_augmentation_options {
random_absolute_pad_image {
max_height_padding: 200
max_width_padding: 200
pad_color: [0, 0, 0]
}
}
optimizer {
adam_optimizer: {
epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default.
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 1e-3
total_steps: 250000
warmup_learning_rate: 2.5e-4
warmup_steps: 5000
}
}
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "pre-trained-models/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8/checkpoint/ckpt-0"
fine_tune_checkpoint_type: "detection"
}
train_input_reader: {
label_map_path: "annotations/label_map_cans_in_fridge.pbtxt"
tf_record_input_reader {
input_path: "annotations/train.record"
}
num_keypoints: 17
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
num_visualizations: 10
max_num_boxes_to_visualize: 20
min_score_threshold: 0.2
batch_size: 1;
parameterized_metric {
coco_keypoint_metrics {
class_label: "person"
keypoint_label_to_sigmas {
key: "nose"
value: 0.026
}
keypoint_label_to_sigmas {
key: "left_eye"
value: 0.025
}
keypoint_label_to_sigmas {
key: "right_eye"
value: 0.025
}
keypoint_label_to_sigmas {
key: "left_ear"
value: 0.035
}
keypoint_label_to_sigmas {
key: "right_ear"
value: 0.035
}
keypoint_label_to_sigmas {
key: "left_shoulder"
value: 0.079
}
keypoint_label_to_sigmas {
key: "right_shoulder"
value: 0.079
}
keypoint_label_to_sigmas {
key: "left_elbow"
value: 0.072
}
keypoint_label_to_sigmas {
key: "right_elbow"
value: 0.072
}
keypoint_label_to_sigmas {
key: "left_wrist"
value: 0.062
}
keypoint_label_to_sigmas {
key: "right_wrist"
value: 0.062
}
keypoint_label_to_sigmas {
key: "left_hip"
value: 0.107
}
keypoint_label_to_sigmas {
key: "right_hip"
value: 0.107
}
keypoint_label_to_sigmas {
key: "left_knee"
value: 0.087
}
keypoint_label_to_sigmas {
key: "right_knee"
value: 0.087
}
keypoint_label_to_sigmas {
key: "left_ankle"
value: 0.089
}
keypoint_label_to_sigmas {
key: "right_ankle"
value: 0.089
}
}
}
# Provide the edges to connect the keypoints. The setting is suitable for
# COCO's 17 human pose keypoints.
keypoint_edge { # nose-left eye
start: 0
end: 1
}
keypoint_edge { # nose-right eye
start: 0
end: 2
}
keypoint_edge { # left eye-left ear
start: 1
end: 3
}
keypoint_edge { # right eye-right ear
start: 2
end: 4
}
keypoint_edge { # nose-left shoulder
start: 0
end: 5
}
keypoint_edge { # nose-right shoulder
start: 0
end: 6
}
keypoint_edge { # left shoulder-left elbow
start: 5
end: 7
}
keypoint_edge { # left elbow-left wrist
start: 7
end: 9
}
keypoint_edge { # right shoulder-right elbow
start: 6
end: 8
}
keypoint_edge { # right elbow-right wrist
start: 8
end: 10
}
keypoint_edge { # left shoulder-right shoulder
start: 5
end: 6
}
keypoint_edge { # left shoulder-left hip
start: 5
end: 11
}
keypoint_edge { # right shoulder-right hip
start: 6
end: 12
}
keypoint_edge { # left hip-right hip
start: 11
end: 12
}
keypoint_edge { # left hip-left knee
start: 11
end: 13
}
keypoint_edge { # left knee-left ankle
start: 13
end: 15
}
keypoint_edge { # right hip-right knee
start: 12
end: 14
}
keypoint_edge { # right knee-right ankle
start: 14
end: 16
}
}
eval_input_reader: {
label_map_path: "annotations/label_map_cans_in_fridge.pbtxt"
tf_record_input_reader {
input_path: "annotations/test.record"
}
num_keypoints: 17
}
I checked the path of al the directories and that's fine. The problem lies with following line:
keypoint_class_name: "item"
The value Item I got from my labelimg.pbtxt file. which looks like this:
id: 1
name: 'ColaCan'
}
item {
id: 2
name: 'FantaCan'
}
item {
id: 3
name: 'SpriteLemonCan'
}
item {
id: 4
name: 'Upperside'
}
item {
id: 5
name: 'VanishingLine'
}
Can someone help me to tell me what I did wrong?
You've chosen a model with keypoint detection and so the pipeline.config will be looking for two label maps.
Firstly, a label map for the objects you're trying to classify via bounding box, and then secondarily a keypoints label map.
The object_detection API's model/research/object_detection/data folder has examples of such keypoint label maps. The keypoint_class_name should reference the class label inside the keypoint label map, e.g. perhaps 'Person'.
This should presumably also map to the keypoints labeling in your training set.

Labels not showing correct name

i am pretty new to this all so bare with me here.
I've made myself a program to recognise tools, issue is, while running it will see the object, but the name will be N/A, note that this doesn't happen to every label (doesn't recognise screwdrivers yet but when it thinks it sees one, it does label it Screwdriver instead of N/A)
Now, I've checked countless forums from people with this issue and i cannot find why this is happening.
I have 16 classes for the 16 objects, labelmap is in order and exactly as shown on multiple other sites.
All out of idea's here ..
:pipeline:
model { ssd {
num_classes: 16
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 512
max_dimension: 512
pad_to_max_dimension: false
}
}
feature_extractor {
type: "ssd_efficientnet-b0_bifpn_keras"
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 4e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.03
}
}
activation: SWISH
batch_norm {
decay: 0.99
scale: true
epsilon: 0.001
}
force_use_bias: true
}
bifpn {
min_level: 3
max_level: 7
num_iterations: 3
num_filters: 64
}
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 4e-05
}
}
initializer {
random_normal_initializer {
mean: 0.0
stddev: 0.01
}
}
activation: SWISH
batch_norm {
decay: 0.99
scale: true
epsilon: 0.001
}
force_use_bias: true
}
depth: 64
num_layers_before_predictor: 3
kernel_size: 3
class_prediction_bias_init: -4.6
use_depthwise: true
}
}
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
scales_per_octave: 3
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 1e-08
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid_focal {
gamma: 1.5
alpha: 0.25
}
}
classification_weight: 1.0
localization_weight: 1.0
}
encode_background_as_zeros: true
normalize_loc_loss_by_codesize: true
inplace_batchnorm_update: true
freeze_batchnorm: false
add_background_class: false } } train_config { batch_size: 1 data_augmentation_options {
random_horizontal_flip {
} } data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 512
scale_min: 0.1
scale_max: 2.0
} } sync_replicas: true optimizer {
momentum_optimizer {
learning_rate {
cosine_decay_learning_rate {
learning_rate_base: 0.08
total_steps: 300000
warmup_learning_rate: 0.001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false } fine_tune_checkpoint: "C:/Users/djust/Desktop/Object_detection/models/research/object_detection/efficientdet_d0_coco17_tpu-32/checkpoint/ckpt-0"
num_steps: 300000 startup_delay_steps: 0.0 replicas_to_aggregate:
8 max_number_of_boxes: 100 unpad_groundtruth_tensors: false
fine_tune_checkpoint_type: "detection" use_bfloat16: false
fine_tune_checkpoint_version: V2 } train_input_reader {
label_map_path:
"C:/Users/djust/Desktop/Object_detection/models/research/object_detection/training/labelmap.pbtxt"
tf_record_input_reader {
input_path: "C:/Users/djust/Desktop/Object_detection/models/research/object_detection/train.record"
} } eval_config { metrics_set: "coco_detection_metrics"
use_moving_averages: false batch_size: 1 } eval_input_reader {
label_map_path:
"C:/Users/djust/Desktop/Object_detection/models/research/object_detection/training/labelmap.pbtxt"
shuffle: false num_epochs: 1 tf_record_input_reader {
input_path: "C:/Users/djust/Desktop/Object_detection/models/research/object_detection/test.record"
} }
:Labelmap:
item {
display_name: 'person'
name: 'person'
id: 1 } item {
display_name: 'crimping_tool'
name: 'crimping_tool'
id: 2 } item {
display_name: 'drill_set'
name: 'drill_set'
id: 3 } item {
display_name: 'utility_knife'
name: 'utility_knife'
id: 4 } item {
display_name: 'screwdriver'
name: 'screwdriver'
id: 5 } item {
display_name: 'stripping_pliers'
name: 'stripping_pliers'
id: 6 } item {
display_name: 'cutting_pliers'
name: 'cutting_pliers'
id: 7 } item {
display_name: 'stripping_tool'
name: 'stripping_tool'
id: 8 } item {
display_name: 'pliers'
name: 'pliers'
id: 9 } item {
display_name: 'pipewrench'
name: 'pipewrench'
id: 10 } item {
display_name: 'measuring_tool'
name: 'measuring_tool'
id: 11 } item {
display_name: 'cable_cutter_angled'
name: 'cable_cutter_angled'
id: 12 } item {
display_name: 'stripping_tool_2'
name: 'stripping_tool_2'
id: 13 } item {
display_name: 'wrench'
name: 'wrench'
id: 14 } item {
display_name: 'hexkey_set'
name: 'hexkey_set'
id: 15 } item {
display_name: 'drill_set_2'
name: 'drill_set_2'
id: 16 }
A possible cause could be that in the TFrecords that you use the "label ID" is not correct. Can you validate that when converting your images and annotations to those TF records that the 'image/object/class/label' is set correctly?
'image/object/class/label':
dataset_util.int64_list_feature(category_ids)
I also noticed there is a "display_name" in your labelmap file, I've never used the display_name and I'm not sure if that could also be a cause of your N/A labels.
If the labels are correctly set in the tfrecord, then I would advise to try a labelmap file with the following structure:
item {
id: 1
name: 'person'
}
item {
id: 2
name: 'crimping_tool'
}
item {
id: 3
name: 'drill_set'
}
...

Disable augmentation in tensorflow training pipeline

I googled around a bit but I only found questions about enabling data augmentation.
I followed this tutorial but with my own dataset (only one class). I already performed data augmentation on my dataset so I deleted the responsible lines from the pipeline.config.
Now my pipeline looks like this
model {
ssd {
num_classes: 1
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
feature_extractor {
type: "ssd_resnet50_v1_fpn_keras"
depth_multiplier: 1.0
min_depth: 16
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 0.00039999998989515007
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.029999999329447746
}
}
activation: RELU_6
batch_norm {
decay: 0.996999979019165
scale: true
epsilon: 0.0010000000474974513
}
}
override_base_feature_extractor_hyperparams: true
fpn {
min_level: 3
max_level: 7
}
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 0.00039999998989515007
}
}
initializer {
random_normal_initializer {
mean: 0.0
stddev: 0.009999999776482582
}
}
activation: RELU_6
batch_norm {
decay: 0.996999979019165
scale: true
epsilon: 0.0010000000474974513
}
}
depth: 256
num_layers_before_predictor: 4
kernel_size: 3
class_prediction_bias_init: -4.599999904632568
}
}
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
scales_per_octave: 2
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 9.99999993922529e-09
iou_threshold: 0.6000000238418579
max_detections_per_class: 100
max_total_detections: 100
use_static_shapes: false
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid_focal {
gamma: 2.0
alpha: 0.25
}
}
classification_weight: 1.0
localization_weight: 1.0
}
encode_background_as_zeros: true
normalize_loc_loss_by_codesize: true
inplace_batchnorm_update: true
freeze_batchnorm: false
}
}
train_config {
batch_size: 1
sync_replicas: true
optimizer {
momentum_optimizer {
learning_rate {
cosine_decay_learning_rate {
learning_rate_base: 0.03999999910593033
total_steps: 25000
warmup_learning_rate: 0.013333000242710114
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.8999999761581421
}
use_moving_average: false
}
fine_tune_checkpoint: "/home/sally/work/training/TensorFlow/workspace/pre-trained-models/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8/checkpoint/ckpt-0"
num_steps: 25000
startup_delay_steps: 0.0
replicas_to_aggregate: 8
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_type: "detection"
use_bfloat16: false
fine_tune_checkpoint_version: V2
}
train_input_reader {
label_map_path: "/home/sally/work/training/TensorFlow/workspace/annotations/label_map.pbtxt"
tf_record_input_reader {
input_path: "/home/sally/work/training/TensorFlow/workspace/annotations/train.record"
}
}
eval_config {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader {
label_map_path: "/home/sally/work/training/TensorFlow/workspace/annotations/label_map.pbtxt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "/home/sally/work/training/TensorFlow/workspace/annotations/test.record"
}
}
I started the training but with tensorboard I can see that the training images are very very distorted.
For reference normal images look like this
As you can see I try to detect Kellogs boxes. The dataset is generated using blender (soda can and fence are to have some sort of decoy objects and to be able to cover the boxes partially)
Now my question: How do I disably any sort of data augmentation in the object detection api?
The map is very very low because of these distorted images used during the training process.
This is an issue with the normalization of the image. It does not affect your training.
However, if you want the images to be displayed correctly in tensorboard, then normalize them between (0, 1). Check this link for some possible changes.
Note: normalizing between (-1, 1) has been reported to create the same issue.