How to use cv2 image data as batch_x input in tensorflow - numpy

I want to use the imgs of my screen as tensorflow input data.
For recording the images I am using the script down below.
The image data is saved as uint8 but tf needs float32.
So, how to convert it that i can input the image in feed_dict
loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,Y: batch_y})
as batch_x?
import numpy as np
import tensorflow as ts
import win32gui, win32ui, win32con, win32api
import cv2
def grab_frame(size=None):
hwin = win32gui.GetDesktopWindow()
if size:
left, top, x, y = size
width= x - left + 1
height= y - top +1
else:
width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN)
top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN)
hwindc = win32gui.GetWindowDC(hwin)
srcdc = win32ui.CreateDCFromHandle(hwindc)
memdc = srcdc.CreateCompatibleDC()
bmp = win32ui.CreateBitmap()
bmp.CreateCompatibleBitmap(srcdc, width, height)
memdc.SelectObject(bmp)
memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)
signedIntsArray = bmp.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (height, width, 4)
srcdc.DeleteDC()
memdc.DeleteDC()
win32gui.ReleaseDC(hwin, hwindc)
win32gui.DeleteObject(bmp.GetHandle())
img_res = cv2.resize(img, (480,270))
return img_res

Related

TypeError: slice indices must be integers or None or have an __index__ method (Albumentations/NumPy)

Hi everyone can you please help me i'm getting this bug with random crop augmentation.
TypeError: slice indices must be integers or None or have an index method
Code is below.
!conda install -c conda-forge gdcm -y
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import cv2 as cv
import albumentations as A
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tqdm.auto import tqdm
def read_img(path, voi_lut=True, fix_monochrome=True):
dcm = pydicom.read_file(path)
if voi_lut:
img = apply_voi_lut(dcm.pixel_array, dcm)
else:
img = dcm.pixel_array
if fix_monochrome and dcm.PhotometricInterpretation == "MONOCHROME1":
img = np.amax(img) - img
img = img - np.min(img)
img = img / np.max(img)
img = (img * 255).astype(np.uint8)
return img
def resize_img(img, size, pad=True, resample=Image.LANCZOS):
img = np.array(img)
if pad:
max_width = 4891
max_height = 4891
img = np.pad(img, ((0, max_height - img.shape[0]), (0, max_width - img.shape[1]), (0, 0)))
img = img.resize((size, size), resample)
return img
def augment_img(img, clahe=True, albumentations=True):
if clahe:
clahe = cv.createCLAHE(clipLimit=15.0, tileGridSize=(8,8))
img = clahe.apply(img)
else:
img = cv.equalizeHist(img)
if albumentations:
img = np.stack((img, ) * 3, axis=-1)
transform = A.Compose([
A.RandomSunFlare(p=0.2),
A.RandomFog(p=0.2),
A.RandomBrightness(p=0.2),
A.RandomCrop(p=1.0, width=img.shape[0] / 2, height=img.shape[1] / 2),
A.Rotate(p=0.2, limit=90),
A.RGBShift(p=0.2),
A.RandomSnow(p=0.2),
A.HorizontalFlip(p=0.2),
A.VerticalFlip(p=0.2),
A.RandomContrast(p=0.2, limit=0.2),
A.HueSaturationValue(p=0.2, hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=50)
])
img = transform(image=img)["image"]
return img
img = read_img('../input/siim-covid19-detection/test/00188a671292/3eb5a506ccf3/3dcdfc352a06.dcm') #You can replace this with any .dcm filepath on your system
img = augment_img(img)
img = resize_img(img, 1024)
plt.imshow(img, cmap='gray')
This is for the SIIM Kaggle competition. I don't know how to solve this and the issue is with only random crop. I tried searching online but i was unable to.
I think the error is in this line:
A.RandomCrop(p=1.0, width=img.shape[0] / 2, height=img.shape[1] / 2)
The problem here is that your width and height may not be integers, but they must be.
Check Albumentations RandomCrop documentation.
And here is the solution.
Explicitly convert width and height arguments to integer:
A.RandomCrop(p=1.0, width=int(img.shape[0] / 2), height=int(img.shape[1] / 2))
Use integer division:
A.RandomCrop(p=1.0, width=img.shape[0] // 2, height=img.shape[1] // 2)
Let me know if it helps!

Tensorflow lite only using the first item in the labelmap.txt file when identifying items

I have installed tensorflow 1.15 and created a custom model. I converted it into a .tflite file so tensorflow lite can read it. Then I ran the following code:
import os
import argparse
import cv2
import numpy as np
import sys
import glob
import importlib.util
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5)
parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', default=None)
parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', default=None)
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
IM_NAME = args.image
IM_DIR = args.imagedir
if (IM_NAME and IM_DIR):
print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.')
sys.exit()
if (not IM_NAME and not IM_DIR):
IM_NAME = 'test1.jpg'
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
if use_TPU:
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
CWD_PATH = os.getcwd()
if IM_DIR:
PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR)
images = glob.glob(PATH_TO_IMAGES + '/*')
elif IM_NAME:
PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME)
images = glob.glob(PATH_TO_IMAGES)
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
del(labels[0])
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
for image_path in images:
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
imH, imW, _ = image.shape
image_resized = cv2.resize(image_rgb, (width, height))
input_data = np.expand_dims(image_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.imshow('Object detector', image)
if cv2.waitKey(0) == ord('q'):
break
cv2.destroyAllWindows()
Now, my custom model seems to work. It located the items on the image correctly but it labels everything with the first item on the labelmap.txt. For example:
labelmap.txt:
key
remote
The model identifies the remotes in the images but labels them as "key" because it is the first thing in the labelmap.txt. I don't know why this is happening, can someone please help me. I am sorry if anything is unclear. Please let me know and I will try my best to clarify a little better. Thank you.
I followed the https://github.com/EdjeElectronics/TensorFlow-Lite-Object-Detection-on-Android-and-Raspberry-Pi.

Using photoshop to complete undersampling in tensorflow object detection?

I'm currently training an object detection model using Tensorflow and I ran into a problem. I don't have enough samples to train my model effectively and it will take me a long time to get more samples. I was wondering if it could be a good idea to complete the remaining samples using photoshop or will I run into issues using this approach?
You have so many options:
imgaug
albumentations
Augmentor
OpenCV:
Image-Augmentation-Using-OpenCV-and-Python-Github-Repo
example code I use before:
import numpy as np
import cv2 as cv
import imutils
def data_augmentation(img, min_rot_angle=-180, max_rot_angle=180, crop_ratio=0.2, smooth_size=3, sharp_val=3, max_noise_scale=10):
(H, W) = img.shape[:2]
img_a = img
all_func = ['flip', 'rotate', 'crop', 'smooth', 'sharp', 'noise']
do_func = np.random.choice(all_func, size=np.random.randint(1, len(all_func)), replace=False)
#do_func = ['crop']
# Filp image, 0: vertically, 1: horizontally
if 'flip' in do_func:
img_a = cv.flip(img_a, np.random.choice([0, 1]))
# Rotate image
if 'rotate' in do_func:
rot_ang = np.random.uniform(min_rot_angle, max_rot_angle)
img_a = imutils.rotate_bound(img_a, rot_ang)
# Crop image
if 'crop' in do_func:
(H_A, W_A) = img_a.shape[:2]
start_x = np.random.randint(0, int(H_A * crop_ratio))
start_y = np.random.randint(0, int(W_A * crop_ratio))
end_x = np.random.randint(int(H_A * (1-crop_ratio)), H_A)
end_y = np.random.randint(int(W_A * (1-crop_ratio)), W_A)
img_a = img_a[start_x:end_x, start_y:end_y]
# Smoothing
if 'smooth' in do_func:
img_a = cv.GaussianBlur(img_a, (smooth_size, smooth_size), 0)
# Sharpening
if 'sharp' in do_func:
de_sharp_val = -(sharp_val - 1) / 8
kernel = np.array([[de_sharp_val]*3, [de_sharp_val, sharp_val, de_sharp_val], [de_sharp_val]*3])
img_a = cv.filter2D(img_a, -1, kernel)
# Add the Gaussian noise to the image
if 'noise' in do_func:
noise_scale = np.random.uniform(0, max_noise_scale)
gauss = np.random.normal(0, noise_scale, img_a.size)
gauss = np.float32(gauss.reshape(img_a.shape[0],img_a.shape[1],img_a.shape[2]))
img_a = cv.add(img_a,gauss)
# Keep shape
img_a = cv.resize(img_a, (W, H))
return np.float32(img_a)
Others:
You can do DA with just tensorflow! more in this blog: Data Augmentation in Python: Everything You Need to Know

How to perform 3D volumetric plotting using 3D arrays on plotly?

I would to perform a 3D volumetric plot using 3D numpy arrays on plotly (something similar to using the isosurface function on MATLAB). The arrays contain 10 slices of images of size 512 by 512 - shape = (10, 512, 512). I followed one of the examples on the plotly site (https://plot.ly/python/3d-volume-plots/) but it returned me an empty plot instead. Why is this the case?
My code is as shown below:
import cv2
import skimage.io as skio
import glob
import os
import numpy as np
import pyvista as pv
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
def plot3D(img_dir):
#Read images into array
img_list = []
index = 0
for img in os.listdir(img_dir):
img_individual = cv2.imread(os.path.join(img_dir,img), cv2.IMREAD_GRAYSCALE)
img_list.append([img_individual])
index += 1 #Count the number of images appended into the list
print(np.shape(img_list)) #shape = (10,1,512,512)
img_listtoarray = np.asarray(img_list) #Convert list to numpy array
img_array = np.ones((index,512,512))
print(np.shape(img_array))
i = 0
j = 0
k = 0
#Reduce 4D array into 3D array of size (10,512,512)
for i in range(index):
while(j < 512):
while(k < 512):
img_array[i,j,k] = img_listtoarray[i,0,j,k]
k += 1
j += 1
k = 0
j = 0
print(np.shape(img_array)) #shape = (10,512,512)
#Create meshgrid
Z, X, Y = np.mgrid[1:10:5j,1:512:5j,1:512:5j] #Check dimensions
fig = go.Figure(data = go.Volume(
x = Z.flatten(),
y = X.flatten(),
z = Y.flatten(),
value = img_array,
isomin = 0.1,
isomax = 0.8,
opacity = 0.3,
surface_count = 30
))
fig.show()
plot3D("train/result_processed/")
This will used be for the 3D image construction of a MDCK cell spheroid by using the segmented image slices as shown in the link:
All of the images to be used are of uint8 type.
Thank you.

How can I draw a rectangle from the points I want, using ROI?

Hello I am beginner in OpenCv.
I have a maze image. I wrote maze solver code. I need to get the photo like the picture for this code to work.
I want to choose the contours of the white area using ROI but I could not
When I try the ROI method I get a smooth rectangle with a black area selected.
https://i.stack.imgur.com/Ty5BX.png -----> this is my code result
https://i.stack.imgur.com/S7zuJ.png --------> I want to this result
import cv2
import numpy as np
#import image
image = cv2.imread('rt4.png')
#grayscaleqq
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
#cv2.imshow('gray', gray)
#qcv2.waitKey(0)
#binary
#ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
threshold = 150
thresh = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1]
cv2.namedWindow('second', cv2.WINDOW_NORMAL)
cv2.imshow('second', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
#dilation
kernel = np.ones((1,1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.namedWindow('dilated', cv2.WINDOW_NORMAL)
cv2.imshow('dilated', img_dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()
#find contours
im2,ctrs, hier = cv2.findContours(img_dilation.copy(),
cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)
[0])
list = []
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y+h, x:x+w]
a = w-x
b = h-y
list.append((a,b,x,y,w,h))
# show ROI
#cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
#cv2.waitKey(0)
if w > 15 and h > 15:
cv2.imwrite('home/Desktop/output/{}.png'.format(i), roi)
cv2.namedWindow('marked areas', cv2.WINDOW_NORMAL)
cv2.imshow('marked areas',image)
cv2.waitKey(0)
cv2.destroyAllWindows()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
gray = np.float32(gray)
dst = cv2.cornerHarris(gray,2,3,0.04)
#result is dilated for marking the corners, not important
dst = cv2.dilate(dst,None)
image[dst>0.01*dst.max()]=[0,0,255]
cv2.imshow('dst',image)
if cv2.waitKey(0) & 0xff == 27:
cv2.destroyAllWindows()
list.sort()
print(list[len(list)-1])
I misunderstood your question earlier. So, I'm rewriting.
As #Silencer has already stated, you could use the drawContours method. You can do it as follows:
import cv2
import numpy as np
#import image
im = cv2.imread('Maze2.png')
gaus = cv2.GaussianBlur(im, (5, 5), 1)
# mask1 = cv2.dilate(gaus, np.ones((15, 15), np.uint8, 3))
mask2 = cv2.erode(gaus, np.ones((5, 5), np.uint8, 1))
imgray = cv2.cvtColor(mask2, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
maxArea1=0
maxI1=0
for i in range(len(contours)):
area = cv2.contourArea(contours[i])
epsilon = 0.01 * cv2.arcLength(contours[i], True)
approx = cv2.approxPolyDP(contours[i], epsilon, True)
if area > maxArea1 :
maxArea1 = area
print(maxArea1)
print(maxI1)
cv2.drawContours(im, contours, maxI1, (0,255,255), 3)
cv2.imshow("yay",im)
cv2.imshow("gray",imgray)
cv2.waitKey(0)
cv2.destroyAllWindows()
I used it on the following image:
And I got the right answer. You can add additional filters, or you could decrease the area using an ROI, to decrese the discrepancy, but it wasn't required
Hope it helps!
A simple solution to just draw a slanted rectangle would be to use cv2.polylines. Based on your result, I'm assuming you have the coordinates of the vertices of the area already, lets call them [x1,y1], [x2,y2], [x3,y3], [x4,y4]. The polylines function draws a line from vertex to vertex to create a closed polygon.
import cv2
import numpy as np
#List coordinates of vertices as an array
pts = np.array([[x1,y1],[x2,y2],[x3,y3],[x4,y4]], np.int32)
pts = pts.reshape((-1,1,2))
#Draw lines from vertex to vertex
cv2.polylines(image, [pts], True, (255,0,0))