Images and masks visualisation - Colab - numpy

Hei everyone.
I have set up an image and mask visual check in Colab using matplotlib, NumPy and Random and it was expected to print the same image number for both (image and mask), but unfortunately, it's not.
For whatever reason, the images don't correspond. Both images have the same size and number/name.
Does anyone have some hints on how to fix this? Thank you in advance!
import random
import numpy as np
image_number = random.randint(0,len(image_dataset))
plt.figure(figsize=(12,6))
plt.subplot(121)
plt.imshow(np.reshape(image_dataset[image_number], (patch_size,patch_size,3)))
plt.subplot(122)
plt.imshow(np.reshape(mask_dataset[image_number], (patch_size,patch_size,3)))
plt.show()
image printed using the code above
This is how I'm importing the training images. (doing the same for masks)
images_dataset = [] #TRAIN IMAGES
for path, subdirs, files in os.walk(root_directory):
dirname = path.split(os.path.sep)[-1]
if dirname =="images":
images = os.listdir(path)
for i, image_name in enumerate(images):
if image_name.endswith('.png'):
image = cv2.imread(path + "/" + image_name, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
SIZE_X = (image.shape[1]//patch_size)*patch_size
SIZE_Y = (image.shape[0]//patch_size)*patch_size
image = Image.fromarray(image)
image = image.crop((0,0,SIZE_X, SIZE_Y))
image = np.array(image)

try to sort out the data.
images = sorted(os.listdir(path))

Related

Tensorflow lite only using the first item in the labelmap.txt file when identifying items

I have installed tensorflow 1.15 and created a custom model. I converted it into a .tflite file so tensorflow lite can read it. Then I ran the following code:
import os
import argparse
import cv2
import numpy as np
import sys
import glob
import importlib.util
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5)
parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', default=None)
parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', default=None)
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
IM_NAME = args.image
IM_DIR = args.imagedir
if (IM_NAME and IM_DIR):
print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.')
sys.exit()
if (not IM_NAME and not IM_DIR):
IM_NAME = 'test1.jpg'
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
if use_TPU:
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
CWD_PATH = os.getcwd()
if IM_DIR:
PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR)
images = glob.glob(PATH_TO_IMAGES + '/*')
elif IM_NAME:
PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME)
images = glob.glob(PATH_TO_IMAGES)
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
del(labels[0])
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
for image_path in images:
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
imH, imW, _ = image.shape
image_resized = cv2.resize(image_rgb, (width, height))
input_data = np.expand_dims(image_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.imshow('Object detector', image)
if cv2.waitKey(0) == ord('q'):
break
cv2.destroyAllWindows()
Now, my custom model seems to work. It located the items on the image correctly but it labels everything with the first item on the labelmap.txt. For example:
labelmap.txt:
key
remote
The model identifies the remotes in the images but labels them as "key" because it is the first thing in the labelmap.txt. I don't know why this is happening, can someone please help me. I am sorry if anything is unclear. Please let me know and I will try my best to clarify a little better. Thank you.
I followed the https://github.com/EdjeElectronics/TensorFlow-Lite-Object-Detection-on-Android-and-Raspberry-Pi.

Label map provided by landmarks module on Tensorflow Hub does not match model output

I am trying to use landmark models on tensorflow hub provided at https://tfhub.dev/google/on_device_vision/classifier/landmarks_classifier_asia_V1/1
It says that the output is "prediction:logits: A vector of 99543 similarity scores". However, my out put shape is (1, 98960).
The label map file has 99543 lines which is aligned with the instruction, but the output size does not match the map file.
The predict result is far from accurate, claims an image of Oriental Tower as The Israel Museum with 0.76 score.
My input image is preprocessed to be [321, 321, 3], scaled to [0, 1] per instruction.
What could be wrong? Is it the model/labelmap issue or there is something wrong with my attempting?
import tensorflow.compat.v2 as tf
import tensorflow_hub as hub
import pandas as pd
IMAGE_HEIGHT = 321
IMAGE_WIDTH = 321
def load_img(path):
img = tf.io.read_file(path)
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.resize(img, [IMAGE_HEIGHT, IMAGE_WIDTH])
img = tf.keras.preprocessing.image.img_to_array(img) / 255.0
img = tf.expand_dims(img, 0)
return img
module_handle = "https://tfhub.dev/google/on_device_vision/classifier/landmarks_classifier_asia_V1/1"
detector = hub.KerasLayer(module_handle, output_key='predictions:logits')
image_path = 'xxx'
img = load_img(image_path)
output_tensor = detector(img)
output_tensor.shape
Which gives the result:
(1, 98960)
Thanks in advance!
As of 2021-03-18, the inconsistencies in the labelmaps have been fixed and an update for the model documentation (i.e. that the length of the output vector should actually be 98960 instead of 99543) is on the way. You can already re-download the labelmap files, which are referenced on the model pages on tfhub.dev. Sorry for the inconvenience!

how to save figure in vis_bbox without white background, when plotting with matplotlib?

i'm trying to save the image after vis_bbox prediction with its original image dimension.
my code:
from PIL import Image, ImageChops
import cv2
img = utils.read_image('/home/ubuntu/ui.jpg', color=True)
bboxes, labels,scores = model.predict([img])
bbox, label, score = bboxes[0], labels[0], scores[0],
colors = voc_colormap(label + 1)
bccd_labels = ('cell', 'cell')
vis_bbox(img, bbox, label_names=bccd_labels, instance_colors=colors, alpha=0.9, linewidth=1.0)
plt.axis("off")
plt.savefig("/home/ubuntu/ins.jpg")
while saving , it saves the image with white background and default size (432 *288).
i need to save the predicted image from vis_bbox with the original dimension (1300 *1300).
Any suggestions would be helpful!

About tf.gfile.FastGFile and cv2 mat

This is the code for reading an image from disk:
image_data = tf.gfile.FastGFile(imagePath, 'rb').read()
with tf.Session() as sess:
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
predictions = sess.run(softmax_tensor,
{'DecodeJpeg/contents:0': image_data})
But now I want to capture frames from videos using Python and opencv, and then classify captured mat images using this model.
I tried :
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
print(image_data)
imgRGB = cv2.imread(image_path)
r, buf = cv2.imencode(".jpg", imgRGB)
bytes_image = Image.fromarray(np.uint8(buf)).tobytes()
print(bytes_image)
and I got two similar result,but it is not the same.
b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00\x00\x00\x00\xff\xe1\x00"Exif\x00\x00MM\x00*\x00\x00\x00........
b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02.......
And when I called "sess.run",I found that the final result is not the same.How can I solve this?

How to Urlretrieve and crop image based on data from CSV file?

I have a CSV file with url's and box coordinates (x coordinate of the top left corner, y coordinate of the top left corner, x coordinate of the bottom right corner and y coordinate of the bottom right corner) and I would like to acquire the image, crop it based on the coordinates (to 256x256) and then save the image. Unfortunately a solution to download the whole database and then create a separate with cropped images is difficult due to the size of the database. That for, it is necessary to create the image database with cropped images from the beginning. Another way is to save the image and then subsequently crop it and rewrite the initial image (and then i += 1 iterate to the next one).
Would the current approach work or should I use a different method for it? Additonally, how would I save the acquired images to a specified folder, as currently it downloads to the same folder as the script.
import urllib.request
import csv
import numpy as np
import pandas as pd
from io import BytesIO
import requests
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
filename = "images"
# open file to read
with open("data_test.csv".format(filename), 'r') as csvfile:
reader = csv.reader(csvfile)
# pop header row (1st row in csv)
header = next(reader)
# iterate on all lines
i = 0
for line in csvfile:
splitted_line = line.split(',')
# check if we have an image URL
if splitted_line[1] != '' and splitted_line[1] != "\n":
response = requests.get(splitted_line[1])
img = Image.open(BytesIO(response.content))
#crop_img = img[splitted_line[2]:splitted_line[3], splitted_line[4]:splitted_line[5]]
#crop_img = img[315:105, 370:173]
img.save(str(i) + ".png")
#crop_img = img[105:105+173,315:315+370]
#[y: y + h, x: x + w]
new_img = img.resize((256, 256))
new_img.save(str(i) + ".png")
imgplot = plt.imshow(img)
plt.show()
# urllib.request.urlopen(splitted_line[1])
print("Image saved for {0}".format(splitted_line[0]))
# img = cv2.imread(img_path, 0)
i += 1
else:
print("No result for {0}".format(splitted_line[0]))
Any further recommendations are welcome.
Edit: The latest version gives me error :
crop_img = img[105:105+173,315:315+370]
TypeError: 'JpegImageFile' object is not subscriptable
I solved the problem using Bytes.IO and some cropping/resizing techniques.
import csv
from io import BytesIO
import requests
from PIL import Image
import matplotlib.pyplot as plt
filename = "images"
# open file to read
with open("data_test.csv".format(filename), 'r') as csvfile:
reader = csv.reader(csvfile)
# pop header row (1st row in csv)
header = next(reader)
# iterate on all lines
i = 0
for line in csvfile:
splitted_line = line.split(',')
# check if we have an image URL
if splitted_line[1] != '' and splitted_line[1] != "\n":
response = requests.get(splitted_line[1])
img = Image.open(BytesIO(response.content))
#im.crop(box) ⇒ 4-tuple defining the left, upper, right, and lower pixel coordinate
left_x = int(splitted_line[2])
top_y = int(splitted_line[3])
right_x = int(splitted_line[4])
bottom_y = int(splitted_line[5])
crop = img.crop((left_x, top_y, right_x, bottom_y))
new_img = crop.resize((256, 256))
"""
# preview new images
imgplot = plt.imshow(new_img)
plt.show()
"""
new_img.save(str(i) + ".png")
print("Image saved for {0}".format(splitted_line[0]))
i += 1
else:
print("No result for {0}".format(splitted_line[0]))
Hope it will help someone. Any optimization recommendations are still welcome.