I'm trying to convert parts of a Keras DarkNet code to try to make the code run faster.
Here is the code I'm trying to optimize:
model_image_size = (416, 416)
import cv2
from PIL import Image
frame = cv2.imread("test.png", cv2.IMREAD_COLOR)
im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
im = Image.fromarray(im).crop((1625, 785, 1920, 1080)) # crop ROI
resized_image = im.resize(tuple(reversed(model_image_size)), Image.BICUBIC)
image_data = np.array(resized_image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
return image_data
This is my attempt to achieve the same output without using the intermediate PIL coversion to reduce time:
model_image_size = (416, 416)
import cv2
frame = cv2.imread("test.png", cv2.IMREAD_COLOR)
frame = frame[785:1080,1625:1920] # crop ROI
im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
resized_image = cv2.resize(im, model_image_size, interpolation = cv2.INTER_CUBIC)
resized_image /= 255.
image_data = np.expand_dims(resized_image, 0) # Add batch dimension.
return image_data
However, upon running the code, it will return:
resized_image /= 255.
TypeError: ufunc 'true_divide' output (typecode 'd') could not be coerced to provided output parameter (typecode 'B') according to the casting rule ''same_kind''
It seems like I need to change the uint8 type to float32 before normalizing but I'm not sure how to achieve it with OpenCV.
You can use resized_image.astype(np.float32) to convert resized_image data from unit8 to float32 and then proceed with normalizing and other stuffs:
frame = cv2.imread("yourfile.png")
frame = frame[200:500,400:1000] # crop ROI
im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
model_image_size = (416, 416)
resized_image = cv2.resize(im, model_image_size, interpolation = cv2.INTER_CUBIC)
resized_image = resized_image.astype(np.float32)
resized_image /= 255.
image_data = np.expand_dims(resized_image, 0) # Add batch dimension.
Your issue is that you are dividing and assigning to the same variable with /=. Numpy expects that when you do that, the array is of the same type as before, but you are dividing with a floating point number which will change the value type.
To solve this issue you can do:
resized_image = resized_image / 255.
and it should work. But you have to note that it will convert the matrix to dtype=float64. To convert it to float32you can do:
resized_image.astype(np.float32)
or
np.float32(resized_image)
The np should come from:
import numpy as np
Related
I am looking at this code and I cant seen to figure out why the image rescaling is not wrong.
import tensorflow as tf
import matplotlib.pyplot as plt
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [256, 256]
files = ["<add_an_image_location>"]
def decode_image(image):
image = tf.io.read_file(image)
image = tf.io.decode_png(image, channels=3)
image = tf.image.resize(image, [*IMAGE_SIZE])
image = (tf.cast(image, tf.float32) / 127.5) - 1 # this I understand, the image is being converted to be in range [-1, 1]
image = tf.reshape(image, [*IMAGE_SIZE, 3])
return image
def load_dataset(filenames, labeled=True, ordered=False):
dataset = tf.data.Dataset.from_tensor_slices(filenames)
dataset = dataset.map(decode_image, num_parallel_calls=AUTOTUNE)
return dataset
ds = load_dataset(files, labeled=True).batch(1)
example = next(iter(ds))
plt.imshow(example[0] * 0.5 + 0.5) # this I cant seem to figure out!
I realise and understand this would work plt.imshow(((example[0] + 1) * 127.5)).numpy().astype('uint'))
Cant seem to understand why this works plt.imshow(example[0] * 0.5 + 0.5)
Similarly -
(tf.cast(image, tf.float32) / 255) - 0.5 # scaling so
plt.imshow(example[0] + 0.5) # and rescaling so works
I have a feeling that I am missing something silly.. but.. please someone help me!! :D
In the ImageDataGenerator, I've used the following function to preprocess images, through the keyword of 'preprocessing' in .flow_from_dataframe().
However, I am now trying to use the image_dataset_from_directory, which does not work with the preprocess function, as it does not allow embedding this function.
I've tried to apply the preprocess_image() function after the dataset is generated by image_dataset_from_directory, through .map() function, but it does not work either.
Please could anyone advise?
Many thanks,
Tony
train_Gen = dataGen.flow_from_dataframe(
df,
x_col='id_code',
y_col='diagnosis',
directory=os.path.join(data_dir, 'train_images'),
batch_size=BATCH_SIZE,
target_size=(IMG_WIDTH, IMG_HEIGHT),
subset='training',
seed=123,
class_mode='categorical',
**preprocessing=preprocess_image**,
)
def crop_image_from_gray(img, tol=7):
"""
Applies masks to the orignal image and
returns the a preprocessed image with
3 channels
:param img: A NumPy Array that will be cropped
:param tol: The tolerance used for masking
:return: A NumPy array containing the cropped image
"""
# If for some reason we only have two channels
if img.ndim == 2:
mask = img > tol
return img[np.ix_(mask.any(1),mask.any(0))]
# If we have a normal RGB images
elif img.ndim == 3:
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
mask = gray_img > tol
check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
if (check_shape == 0): # image is too dark so that we crop out everything,
return img # return original image
else:
img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
img = np.stack([img1,img2,img3],axis=-1)
return img
def preprocess_image(image, sigmaX=10):
"""
The whole preprocessing pipeline:
1. Read in image
2. Apply masks
3. Resize image to desired size
4. Add Gaussian noise to increase Robustness
:param img: A NumPy Array that will be cropped
:param sigmaX: Value used for add GaussianBlur to the image
:return: A NumPy array containing the preprocessed image
"""
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = crop_image_from_gray(image)
image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
image = cv2.addWeighted (image,4, cv2.GaussianBlur(image, (0,0) ,sigmaX), -4, 128)
return image
I was using this pytorch model for real-ESRGAN. It was giving good results:
Now I converted this model to tensorflow one. Now when I use this, I am getting image super resoluted (I reached to that conclusion due to image size of output) but its color channels are in very weird condition:
I am using opencv to take input and then model to process it. I feel issue is with BGR and RGB of OpenCV and Tensorflow but using cv2.COLOR_BGRTORGB not helping.
Any idea how to solve this?
This is my code:
from pyexpat import model
import tensorflow as tf
import os.path as osp
import glob
import cv2
import numpy as np
import torch
test_img_folder = './images/*'
model = tf.saved_model.load("./RealESRGAN_1/")
idx = 0
for path in glob.glob(test_img_folder):
idx += 1
base = osp.splitext(osp.basename(path))[0]
print(idx, base)
# read images
img = cv2.imread(path, cv2.IMREAD_COLOR)
print(img.shape)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.imshow('Input',img)
cv2.waitKey(0)
img = np.transpose(img, (2,0,1))
img = np.expand_dims(img, axis=0)
img = tf.dtypes.cast(img, tf.float32)
with torch.no_grad():
output = model(x = img)
output = output['sum'].numpy()
output = output[0, :, :, :]
output = np.transpose(output, (1,2,0))
cv2.imwrite('./results/{:s}_rlt.png'.format(base), output)
I am fairly new to tensorflow and I have a tflite model which needs inference on a single image (ie no datasets). The docs say the input should be 224,224,3 and scaled to [0,1] (https://www.tensorflow.org/lite/tutorials/model_maker_image_classification#advanced_usage), but I am having trouble doing this rescaling to [0,1].
Currently I have something like so:
img = tf.io.read_file(image_path)
img = tf.io.decode_image(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.uint8)
print('min max img value',tf.reduce_min(img),tf.reduce_max(img))
The min and max and 0 and 255 respectively. I would like to scale this to [0,1]
I am on tf 2.5 and I do not see a builtin method to do this..
I tried doing this:
img = tf.io.read_file(image_path)
img = tf.io.decode_image(img, channels=3)
scale=1./255
img=img*scale
img = tf.image.convert_image_dtype(img, tf.uint8)
print('min max img value',tf.reduce_min(img),tf.reduce_max(img))
and I get thrown:
TypeError: Cannot convert 0.00392156862745098 to EagerTensor of dtype uint8
I think there is some casting error :(
In order to avoid
TypeError: Cannot convert 0.00392156862745098 to EagerTensor of dtype uint8
error we have to cast img form tf.unit8 to tf.float32 like
img = tf.cast(img, dtype=tf.float32) / tf.constant(256, dtype=tf.float32)
print('min max img value', tf.reduce_min(img), tf.reduce_max(img))
Conversion an image tensor in tf.float32 normalized to scale [0, 1] to tf.uint8 is probably not a good idea.
I created a minimum working example of an image preprocessing step which is to be ported over from Caffe v.1 to tensorflow. I am able to reproduce the steps using PIL + skimage however, I am unable to do the same in tensorflow. As observed, the L2 norm between the transformed image in Caffe and tensorflow is high which is not the case for the PIL + skimage. How do we reproduce the steps the image underwent in Caffe or PIL methods using tensorflow?
import tensorflow as tf
import numpy as np
from PIL import Image
import caffe
from skimage.transform import resize
import requests
image_url = 'https://tinyjpg.com/images/social/website.jpg'
TEST_IMAGE = 'test_image.jpg'
DATA_LAYER = 'data_p'
MEAN = np.array([131.26315308, 140.62084961, 142.71440125], dtype=np.float32)
img_data = requests.get(image_url).content
with open(TEST_IMAGE, 'wb') as handler:
handler.write(img_data)
def create_transformer():
transformer = caffe.io.Transformer({DATA_LAYER: (1, 3, 224, 224)})
transformer.set_transpose(DATA_LAYER, (2,0,1))
transformer.set_channel_swap(DATA_LAYER,(2,1,0))
transformer.set_mean(DATA_LAYER, MEAN)
transformer.set_raw_scale(DATA_LAYER, 255)
return transformer
def transform_image_original(test_image):
'''
Creates a caffe.io.Transformer
'''
t = create_transformer()
image_data = Image.fromarray(np.uint8(caffe.io.load_image(TEST_IMAGE) * 255))
input_image = np.array(image_data) / 255.0
transformed_image = t.preprocess(DATA_LAYER, input_image)
return transformed_image
def _resize(im, new_dims,interp_order=1):
im_min, im_max = im.min(), im.max()
if im_max > im_min:
im_std = (im - im_min) / (im_max - im_min)
resized_std = resize(im_std, new_dims, order=interp_order, mode='constant')
resized_im = resized_std * (im_max - im_min) + im_min
return resized_im
def preprocess_image(TEST_IMAGE, height=224, width=224):
'''
replicates the caffe tranformation using PIL and skimage.transform
'''
with open(TEST_IMAGE, 'rb') as f:
image = Image.open(f)
image = image.convert('RGB')
image = np.array(image, np.float32) / 255.0
image = _resize(image, (224, 224))
image = np.array(image, np.float32) * 255.0
import scipy.misc
scipy.misc.imsave('pil_file.jpg', image)
image = np.transpose(image, (2, 0, 1))
image = image[::-1, ...] # convert RGB to BGR
image = image - MEAN.reshape((3, 1, 1))
return image
def tf_preprocess_image(TEST_IMAGE, height=224, width=224):
'''
preprocessing an image in tensorflow
'''
image_string = tf.read_file(TEST_IMAGE)
image = tf.image.decode_jpeg(image_string, channels=3, dct_method='INTEGER_ACCURATE',
fancy_upscaling=False, acceptable_fraction=1, try_recover_truncated=True)
image = tf.to_float(image) / 255
image = tf.div(
tf.subtract(
image,
tf.reduce_min(image)
),
tf.subtract(
tf.reduce_max(image),
tf.reduce_min(image)
)
)
if height and width:
# Resize the image to the specified height and width.
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width],
align_corners=False)
image = tf.squeeze(image, [0])
image = tf.add(
tf.multiply(
image,
tf.subtract(
tf.reduce_max(image),
tf.reduce_min(image)
)
),
tf.reduce_min(image)
)
image = tf.to_float(image) * 255
# RGB to BGR using strided slice
image = image[..., ::-1]
# Channel last to channel first
image = tf.transpose(image, [2, 0, 1])
# Mean subtraction
image = tf.subtract(image, MEAN.reshape(3,1,1))
return image
print('Preprocessing test image using Caffe...')
image = transform_image_original(TEST_IMAGE)
print('Preprocessing test image using PIL + skimage.transform ...')
image2 = preprocess_image(TEST_IMAGE)
print('Preprocessing test image using tensorflow')
with tf.Session() as sess:
tf_image3 = tf_preprocess_image(TEST_IMAGE)
image3 = tf_image3.eval()
print('L2 norm between caffe transformation and PIL + skimage', np.linalg.norm(image - image2)) # L2 norm
print('L2 norm between caffe transformation and tf', np.linalg.norm(image2 - image3))