I am training a custom Dataset sample_data where each image annotation is in the Pascal VOC XML format.
When training the MaskRCNN model from the matterport repo for TensorFlow 1.15 and the maskrcnn for Tensorflow 2.2.0
config for the matterport using tensorflow-cpu :
config
When running the model (using both versions) tensorflow-cpu, data generation is pretty fast(almost instantly) and training happens as expected with proper loss values
But when using the tensorflow-gpu,
The model loading is too long, then epochs start after another 7-10 minutes and the loss generated is nan,
I've tried to
lower the Learning rate to 1e-5,
multiprocessing off,
workers = 1,
changed optimizer to Adam,
System Specs:
i5 12400f,
12gb Ram,
12Gb RTX 3060,
all cudnn and cudatoolkit version according to tensorflow documentation installed.
Training Code :
from os import listdir
import imgaug
import numpy as np
from xml.etree import ElementTree
from numpy import zeros
from numpy import asarray
from mrcnn.utils import Dataset
from matplotlib import pyplot
from mrcnn.visualize import display_instances
from mrcnn.utils import extract_bboxes
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
import mrcnn.model as mrmodel
import warnings
import tensorflow as tf
import time
warnings.filterwarnings('ignore')
# gpu_available = tf.config.list_physical_devices('GPU')
gpu_available = tf.test.is_gpu_available()
gpu_available
class CornDataset(Dataset):
# load the dataset definitions
def load_dataset(self, dataset_dir, is_train=True):
# start = time.perf_counter()
# define classes
self.add_class("dataset", 1, "fall-armyworm-larva")
self.add_class("dataset", 2, "fall-armyworm-larval-damage")
self.add_class("dataset", 3, "fall-armyworm-frass")
self.add_class("dataset", 4, "fall-armyworm-egg")
self.add_class("dataset", 5, "healthy-maize")
self.add_class("dataset", 6, "maize-streak-disease")
# define data locations
images_dir = dataset_dir + '/images/'
annotations_dir = dataset_dir + '/annots/'
# find all images
count = 1
for filename in listdir(images_dir):
print(filename)
# extract image id
image_id = filename[:-4]
name1 = ''
if filename[-4:] != 'jpeg':
name1 = filename[:-4]
else:
name1 = filename[:-5]
image_id = name1
# skip all images after 115 if we are building the train set
if is_train and int(image_id) >= 6770:
continue
# skip all images before 115 if we are building the test/val set
if not is_train and int(image_id) < 6770:
continue
img_path = images_dir + filename
ann_path = annotations_dir + image_id + '.xml'
# add to dataset
self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path, class_ids = [0,1,2,3,4,5,6])
# stop = time.perf_counter()
# print("time for load_dataset",(stop-start))
# extract bounding boxes from an annotation file
def extract_boxes(self, filename):
# start = time.perf_counter()
# load and parse the file
tree = ElementTree.parse(filename)
# get the root of the document
root = tree.getroot()
# extract each bounding box
boxes = list()
for box in root.findall('.//object'):
name = box.find('name').text #Add label name to the box list
xmin = int(box.find('./bndbox/xmin').text)
ymin = int(box.find('./bndbox/ymin').text)
xmax = int(box.find('./bndbox/xmax').text)
ymax = int(box.find('./bndbox/ymax').text)
coors = [xmin, ymin, xmax, ymax, name]
boxes.append(coors)
# extract image dimensions
width = int(root.find('.//size/width').text)
height = int(root.find('.//size/height').text)
# stop = time.perf_counter()
# print("time for extract_boxes",(stop-start))
return boxes, width, height
# load the masks for an image
def load_mask(self, image_id):
# start = time.perf_counter()
# get details of image
info = self.image_info[image_id]
# define box file location
path = info['annotation']
#return info, path
# load XML
boxes, w, h = self.extract_boxes(path)
# create one array for all masks, each on a different channel
masks = zeros([h, w, len(boxes)], dtype='uint8')
# create masks
class_ids = list()
for i in range(len(boxes)):
box = boxes[i]
row_s, row_e = box[1], box[3]
col_s, col_e = box[0], box[2]
# box[4] will have the name of the class
if box[4]=='fall-armyworm-larva':
masks[row_s:row_e, col_s:col_e, i] = 1
class_ids.append(self.class_names.index('fall-armyworm-larva'))
elif box[4]=='fall-armyworm-larval-damage':
masks[row_s:row_e, col_s:col_e, i] = 2
class_ids.append(self.class_names.index('fall-armyworm-larval-damage'))
elif box[4]=='fall-armyworm-frass':
masks[row_s:row_e, col_s:col_e, i] = 3
class_ids.append(self.class_names.index('fall-armyworm-frass'))
elif box[4]=='fall-armyworm-egg':
masks[row_s:row_e, col_s:col_e, i] = 4
class_ids.append(self.class_names.index('fall-armyworm-egg'))
elif box[4]=='healthy-maize' or box[4]=='healthy-maize' or box[4]=='healthy-images' or box[4]=='none-healthy':
masks[row_s:row_e, col_s:col_e, i] = 5
class_ids.append(self.class_names.index('healthy-maize'))
elif box[4]=='maize-streak-disease':
masks[row_s:row_e, col_s:col_e, i] = 6
class_ids.append(self.class_names.index('maize-streak-disease'))
# stop = time.perf_counter()
# print("time for load_mask",(stop-start))
return masks, asarray(class_ids, dtype='int32')
# load an image reference
def image_reference(self, image_id):
info = self.image_info[image_id]
return info['path']
dataset_dir='final_dataset/'
validset_dir = 'validation/'
train_set = CornDataset()
train_set.load_dataset(dataset_dir, is_train=True)
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))
# test/val set
test_set = CornDataset()
test_set.load_dataset(dataset_dir, is_train=False)
test_set.prepare()
print('Test: %d' % len(test_set.image_ids))
import random
num=random.randint(0, len(train_set.image_ids))
# define image id
image_id = num
# load the image
image = train_set.load_image(image_id)
# load the masks and the class ids
mask, class_ids = train_set.load_mask(image_id)
# extract bounding boxes from the masks
bbox = extract_bboxes(mask)
# display image with masks and bounding boxes
display_instances(image, bbox, mask, class_ids, train_set.class_names)
class CornConfig(Config):
# define the name of the configuration
NAME = "corn_cfg"
# number of classes (background + 5 Diseases + 1 Healthy)
NUM_CLASSES = 1 + 6
IMAGES_PER_GPU = 1
# number of training steps per epoch
STEPS_PER_EPOCH = 100
VALIDATION_STEPS = 50
# Skip detections with < 90% confidence
# DETECTION_MIN_CONFIDENCE = 0.8
LEARNING_RATE = 1e-4
# BATCH_SIZE = 28
# prepare config
config = CornConfig()
config.display()
import os
ROOT_DIR = "/home/mehathab/Desktop/maskrcnn_drY-run"
# Directory to save logs and trained model
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
# define the model
model = MaskRCNN(mode='training', model_dir=DEFAULT_LOGS_DIR, config=config)
model_inference = MaskRCNN(mode="inference", config=config, model_dir=DEFAULT_LOGS_DIR)
# load weights (mscoco) and exclude the output layers
WEIGHT_PATH = 'mask_rcnn_coco.h5'
model.load_weights(WEIGHT_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
# train weights (output layers or 'heads')
# history = model.train(train_set, test_set, learning_rate=config.LEARNING_RATE, epochs=100, layers='3+')
mean_average_precision_callback = mrmodel.MeanAveragePrecisionCallback(model,
model_inference,
test_set,
calculate_map_at_every_X_epoch=5,
verbose=1)
model.train(train_set,test_set,
learning_rate=config.LEARNING_RATE,
epochs=100,
layers='heads',
custom_callbacks=[mean_average_precision_callback])
Related
I have been working with Swin Transformers Attention MaP. Below is my code implementation
from PIL import Image
import numpy
import sys
from torchvision import transforms
import numpy as np
import cv2
def rollout(attentions, discard_ratio, head_fusion):
result = torch.eye(attentions[0].size(-1))
with torch.no_grad():
for attention in attentions:
# print(attentions)
if head_fusion == "mean":
attention_heads_fused = attention.mean(axis=1)
elif head_fusion == "max":
attention_heads_fused = attention.max(axis=1)[0]
elif head_fusion == "min":
attention_heads_fused = attention.min(axis=1)[0]
else:
raise "Attention head fusion type Not supported"
# Drop the lowest attentions, but
# don't drop the class token
flat = attention_heads_fused.view(attention_heads_fused.size(0), -1)
# print(flat)
_, indices = flat.topk(int(flat.size(-1)*discard_ratio), -1, False)
# print("_ : ",_," indices : ",indices)
indices = indices[indices != 0]
flat[0, indices] = 0
I = torch.eye(attention_heads_fused.size(-1))
# print("I : ",I)
a = (attention_heads_fused + 1.0*I)/2
# print("a : ",a)
# print(a.size())
print(a.sum(dim=-1))
a = a / a.sum(dim=-1)
result = torch.matmul(a, result)
# print("result : ",result)
# Look at the total attention between the class token,
# and the image patches
mask = result[0, 0 , 1 :]
# In case of 224x224 image, this brings us from 196 to 14
width = int(mask.size(-1)**0.5)
mask = mask.reshape(width, width).numpy()
mask = mask / np.max(mask)
return mask
class VITAttentionRollout:
def __init__(self, model, attention_layer_name='dropout', head_fusion="mean",
discard_ratio=0.9):
self.model = model
self.head_fusion = head_fusion
self.discard_ratio = discard_ratio
# print(self.model.named_modules())
for name, module in self.model.named_modules():
# print("Name : ",name," Module : ",module)
if attention_layer_name in name:
module.register_forward_hook(self.get_attention)
# print(self.attentions)
self.attentions = []
def get_attention(self, module, input, output):
self.attentions.append(output.cpu())
def __call__(self, input_tensor):
self.attentions = []
with torch.no_grad():
output = self.model(**input_tensor)
# print(output)
return rollout(self.attentions, self.discard_ratio, self.head_fusion)
This is the main program
import sys
import torch
from PIL import Image
from torchvision import transforms
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
# from vit_rollout import VITAttentionRollout
from vit_grad_rollout import VITAttentionGradRollout
def show_mask_on_image(img, mask):
img = np.float32(img) / 255
heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
cam = heatmap + np.float32(img)
cam = cam / np.max(cam)
return np.uint8(255 * cam)
if __name__ == '__main__':
model.eval()
image_path = '/content/both.jpg'
category_index = None
head_fusion = 'max'
discard_ratio = 0.9
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]),
])
img = Image.open(image_path)
img = img.resize((224, 224))
input_tensor = feature_extractor(img, return_tensors="pt")
#print(input_tensor)
if category_index is None:
print("Doing Attention Rollout")
attention_rollout = VITAttentionRollout(model, head_fusion=head_fusion,
discard_ratio=discard_ratio)
mask = attention_rollout(input_tensor)
name = "attention_rollout_{:.3f}_{}.png".format(discard_ratio, head_fusion)
else:
print("Doing Gradient Attention Rollout")
grad_rollout = VITAttentionGradRollout(model, discard_ratio=discard_ratio)
mask = grad_rollout(input_tensor, category_index)
name = "grad_rollout_{}_{:.3f}_{}.png".format(category_index,
discard_ratio, head_fusion)
np_img = np.array(img)[:, :, ::-1]
mask = cv2.resize(mask, (np_img.shape[1], np_img.shape[0]))
mask = show_mask_on_image(np_img, mask)
cv2_imshow(np_img)
cv2_imshow(mask)
cv2.imwrite("input.jpg",np_img)
cv2.imwrite(name, mask)
cv2.waitKey(-1)
I am referring the git project https://github.com/jacobgil/vit-explain
But I am getting the error as RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1
I researched some git projects but there is very much less information on Swin Transformers. So is there any way that I can make an attention map for Swin transformers models ?
Please help with it
Thanks in advance
I have installed tensorflow 1.15 and created a custom model. I converted it into a .tflite file so tensorflow lite can read it. Then I ran the following code:
import os
import argparse
import cv2
import numpy as np
import sys
import glob
import importlib.util
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5)
parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', default=None)
parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', default=None)
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
IM_NAME = args.image
IM_DIR = args.imagedir
if (IM_NAME and IM_DIR):
print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.')
sys.exit()
if (not IM_NAME and not IM_DIR):
IM_NAME = 'test1.jpg'
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
if use_TPU:
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
CWD_PATH = os.getcwd()
if IM_DIR:
PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR)
images = glob.glob(PATH_TO_IMAGES + '/*')
elif IM_NAME:
PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME)
images = glob.glob(PATH_TO_IMAGES)
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
del(labels[0])
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
for image_path in images:
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
imH, imW, _ = image.shape
image_resized = cv2.resize(image_rgb, (width, height))
input_data = np.expand_dims(image_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.imshow('Object detector', image)
if cv2.waitKey(0) == ord('q'):
break
cv2.destroyAllWindows()
Now, my custom model seems to work. It located the items on the image correctly but it labels everything with the first item on the labelmap.txt. For example:
labelmap.txt:
key
remote
The model identifies the remotes in the images but labels them as "key" because it is the first thing in the labelmap.txt. I don't know why this is happening, can someone please help me. I am sorry if anything is unclear. Please let me know and I will try my best to clarify a little better. Thank you.
I followed the https://github.com/EdjeElectronics/TensorFlow-Lite-Object-Detection-on-Android-and-Raspberry-Pi.
I'm currently training an object detection model using Tensorflow and I ran into a problem. I don't have enough samples to train my model effectively and it will take me a long time to get more samples. I was wondering if it could be a good idea to complete the remaining samples using photoshop or will I run into issues using this approach?
You have so many options:
imgaug
albumentations
Augmentor
OpenCV:
Image-Augmentation-Using-OpenCV-and-Python-Github-Repo
example code I use before:
import numpy as np
import cv2 as cv
import imutils
def data_augmentation(img, min_rot_angle=-180, max_rot_angle=180, crop_ratio=0.2, smooth_size=3, sharp_val=3, max_noise_scale=10):
(H, W) = img.shape[:2]
img_a = img
all_func = ['flip', 'rotate', 'crop', 'smooth', 'sharp', 'noise']
do_func = np.random.choice(all_func, size=np.random.randint(1, len(all_func)), replace=False)
#do_func = ['crop']
# Filp image, 0: vertically, 1: horizontally
if 'flip' in do_func:
img_a = cv.flip(img_a, np.random.choice([0, 1]))
# Rotate image
if 'rotate' in do_func:
rot_ang = np.random.uniform(min_rot_angle, max_rot_angle)
img_a = imutils.rotate_bound(img_a, rot_ang)
# Crop image
if 'crop' in do_func:
(H_A, W_A) = img_a.shape[:2]
start_x = np.random.randint(0, int(H_A * crop_ratio))
start_y = np.random.randint(0, int(W_A * crop_ratio))
end_x = np.random.randint(int(H_A * (1-crop_ratio)), H_A)
end_y = np.random.randint(int(W_A * (1-crop_ratio)), W_A)
img_a = img_a[start_x:end_x, start_y:end_y]
# Smoothing
if 'smooth' in do_func:
img_a = cv.GaussianBlur(img_a, (smooth_size, smooth_size), 0)
# Sharpening
if 'sharp' in do_func:
de_sharp_val = -(sharp_val - 1) / 8
kernel = np.array([[de_sharp_val]*3, [de_sharp_val, sharp_val, de_sharp_val], [de_sharp_val]*3])
img_a = cv.filter2D(img_a, -1, kernel)
# Add the Gaussian noise to the image
if 'noise' in do_func:
noise_scale = np.random.uniform(0, max_noise_scale)
gauss = np.random.normal(0, noise_scale, img_a.size)
gauss = np.float32(gauss.reshape(img_a.shape[0],img_a.shape[1],img_a.shape[2]))
img_a = cv.add(img_a,gauss)
# Keep shape
img_a = cv.resize(img_a, (W, H))
return np.float32(img_a)
Others:
You can do DA with just tensorflow! more in this blog: Data Augmentation in Python: Everything You Need to Know
I resorted to using the cloud training workflow. Given the product I got, I would have expected to drop directly into the code that I have that works with other tflite models, but the cloud produced model doesn't work. I get "index out of range" when asking for interpreter.get_tensor parameters.
Here is my code, basically a modified example, where I can ingest a video and produce a video with results.
import argparse
import cv2
import numpy as np
import sys
import importlib.util
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='model.tflite')
# default='/tmp/detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='dict.txt')
# default='/tmp/coco_labels.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.5)
parser.add_argument('--video', help='Name of the video file',
default='test.mp4')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
VIDEO_NAME = args.video
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
# Import TensorFlow libraries
# If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tensorflow')
pkg = True
if pkg is None:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
# If using Edge TPU, assign filename for Edge TPU model
if use_TPU:
# If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
# Get path to current working directory
CWD_PATH = os.getcwd()
# Path to video file
VIDEO_PATH = os.path.join(CWD_PATH,VIDEO_NAME)
# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
del(labels[0])
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
# Open video file
video = cv2.VideoCapture(VIDEO_PATH)
imW = video.get(cv2.CAP_PROP_FRAME_WIDTH)
imH = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(
'M', 'J', 'P', 'G'), 10, (1920, 1080))
while(video.isOpened()):
# Acquire frame and resize to expected shape [1xHxWx3]
ret, frame = video.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
print (boxes)
print (classes)
print (scores)
#num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed)
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 4)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0],
label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 0, 0), 2) # Draw label text
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
#output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
out.write(frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
video.release()
out.release()
cv2.destroyAllWindows()
Here is what the print statements should look like when using the canned tflite model:
[32. 76. 56. 76. 0. 61. 74. 0. 0. 0.]
[0.609375 0.48828125 0.44921875 0.44921875 0.4140625 0.40234375
0.37890625 0.3125 0.3125 0.3125 ]
[[-0.01923192 0.17330796 0.747546 0.8384144 ]
[ 0.01866053 0.5023282 0.39603746 0.6143299 ]
[ 0.01673795 0.47382414 0.34407628 0.5580931 ]
[ 0.11588445 0.78543806 0.8778869 1.0039229 ]
[ 0.8106107 0.70675755 1.0080075 0.89248717]
[ 0.84941524 0.06391776 1.0006479 0.28792098]
[ 0.05543692 0.53557926 0.40413857 0.62823087]
[ 0.07051808 -0.00938512 0.8822515 0.28100258]
[ 0.68205094 0.33990026 0.9940187 0.6020821 ]
[ 0.08010477 0.01998334 0.6011186 0.26135433]]
Here is the error when presented with the cloud created model:
File "tflite_vid.py", line 124, in <module>
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
IndexError: list index out of range
So I would kindly ask that someone explain how to either develop a TFlite model with TF2 with Python or how to get the cloud to generate a usable TFlite model. Please oh please do not point me into a direction that entails wondering through the Internet examples unless they are the actual gospel on how to do this.,
In output_details[1], it is [1] <- list index out of range. Your model may have 1 output, but the code try to access the 2nd output.
For more usage about Python code, please refer to https://www.tensorflow.org/lite/guide/inference#load_and_run_a_model_in_python for guidance.
Learning how to use tensorflow, first tutorial code on mandelbrot set below
# Import libraries for simulation
import tensorflow as tf
import numpy as np
# Imports for visualization
import PIL.Image
from io import BytesIO
from IPython.display import Image, display
def DisplayFractal(a, fmt='jpeg'):
"""Display an array of iteration counts as a
colorful picture of a fractal."""
a_cyclic = (6.28*a/20.0).reshape(list(a.shape)+[1])
img = np.concatenate([10+20*np.cos(a_cyclic),
30+50*np.sin(a_cyclic),
155-80*np.cos(a_cyclic)], 2)
img[a==a.max()] = 0
a = img
a = np.uint8(np.clip(a, 0, 255))
f = BytesIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
sess = tf.InteractiveSession()
# Use NumPy to create a 2D array of complex numbers
Y, X = np.mgrid[-1.3:1.3:0.005, -2:1:0.005]
Z = X+1j*Y
xs = tf.constant(Z.astype(np.complex64))
zs = tf.Variable(xs)
ns = tf.Variable(tf.zeros_like(xs, tf.float32))
tf.global_variables_initializer().run()
# Compute the new values of z: z^2 + x
zs_ = zs*zs + xs
# Have we diverged with this new value?
not_diverged = tf.abs(zs_) < 4
# Operation to update the zs and the iteration count.
#
# Note: We keep computing zs after they diverge! This
# is very wasteful! There are better, if a little
# less simple, ways to do this.
#
step = tf.group(
zs.assign(zs_),
ns.assign_add(tf.cast(not_diverged, tf.float32))
)
for i in range(200): step.run()
DisplayFractal(ns.eval())
returns this on shell
<IPython.core.display.Image at 0x7fcdee1da810>
It doesn't display the image and I'd prefer if it saved the image.
How can I save the result as an image?
Scipy has an easy image save function! https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.misc.imsave.html
You should try this:
import scipy.misc
scipy.misc.imsave('mandelbrot.png',ns.eval())
I hope this works! Regardless, let me know!