Image transformation in Tensorflow vs Caffe vs PIL + skimage - numpy

I created a minimum working example of an image preprocessing step which is to be ported over from Caffe v.1 to tensorflow. I am able to reproduce the steps using PIL + skimage however, I am unable to do the same in tensorflow. As observed, the L2 norm between the transformed image in Caffe and tensorflow is high which is not the case for the PIL + skimage. How do we reproduce the steps the image underwent in Caffe or PIL methods using tensorflow?
import tensorflow as tf
import numpy as np
from PIL import Image
import caffe
from skimage.transform import resize
import requests
image_url = 'https://tinyjpg.com/images/social/website.jpg'
TEST_IMAGE = 'test_image.jpg'
DATA_LAYER = 'data_p'
MEAN = np.array([131.26315308, 140.62084961, 142.71440125], dtype=np.float32)
img_data = requests.get(image_url).content
with open(TEST_IMAGE, 'wb') as handler:
handler.write(img_data)
def create_transformer():
transformer = caffe.io.Transformer({DATA_LAYER: (1, 3, 224, 224)})
transformer.set_transpose(DATA_LAYER, (2,0,1))
transformer.set_channel_swap(DATA_LAYER,(2,1,0))
transformer.set_mean(DATA_LAYER, MEAN)
transformer.set_raw_scale(DATA_LAYER, 255)
return transformer
def transform_image_original(test_image):
'''
Creates a caffe.io.Transformer
'''
t = create_transformer()
image_data = Image.fromarray(np.uint8(caffe.io.load_image(TEST_IMAGE) * 255))
input_image = np.array(image_data) / 255.0
transformed_image = t.preprocess(DATA_LAYER, input_image)
return transformed_image
def _resize(im, new_dims,interp_order=1):
im_min, im_max = im.min(), im.max()
if im_max > im_min:
im_std = (im - im_min) / (im_max - im_min)
resized_std = resize(im_std, new_dims, order=interp_order, mode='constant')
resized_im = resized_std * (im_max - im_min) + im_min
return resized_im
def preprocess_image(TEST_IMAGE, height=224, width=224):
'''
replicates the caffe tranformation using PIL and skimage.transform
'''
with open(TEST_IMAGE, 'rb') as f:
image = Image.open(f)
image = image.convert('RGB')
image = np.array(image, np.float32) / 255.0
image = _resize(image, (224, 224))
image = np.array(image, np.float32) * 255.0
import scipy.misc
scipy.misc.imsave('pil_file.jpg', image)
image = np.transpose(image, (2, 0, 1))
image = image[::-1, ...] # convert RGB to BGR
image = image - MEAN.reshape((3, 1, 1))
return image
def tf_preprocess_image(TEST_IMAGE, height=224, width=224):
'''
preprocessing an image in tensorflow
'''
image_string = tf.read_file(TEST_IMAGE)
image = tf.image.decode_jpeg(image_string, channels=3, dct_method='INTEGER_ACCURATE',
fancy_upscaling=False, acceptable_fraction=1, try_recover_truncated=True)
image = tf.to_float(image) / 255
image = tf.div(
tf.subtract(
image,
tf.reduce_min(image)
),
tf.subtract(
tf.reduce_max(image),
tf.reduce_min(image)
)
)
if height and width:
# Resize the image to the specified height and width.
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width],
align_corners=False)
image = tf.squeeze(image, [0])
image = tf.add(
tf.multiply(
image,
tf.subtract(
tf.reduce_max(image),
tf.reduce_min(image)
)
),
tf.reduce_min(image)
)
image = tf.to_float(image) * 255
# RGB to BGR using strided slice
image = image[..., ::-1]
# Channel last to channel first
image = tf.transpose(image, [2, 0, 1])
# Mean subtraction
image = tf.subtract(image, MEAN.reshape(3,1,1))
return image
print('Preprocessing test image using Caffe...')
image = transform_image_original(TEST_IMAGE)
print('Preprocessing test image using PIL + skimage.transform ...')
image2 = preprocess_image(TEST_IMAGE)
print('Preprocessing test image using tensorflow')
with tf.Session() as sess:
tf_image3 = tf_preprocess_image(TEST_IMAGE)
image3 = tf_image3.eval()
print('L2 norm between caffe transformation and PIL + skimage', np.linalg.norm(image - image2)) # L2 norm
print('L2 norm between caffe transformation and tf', np.linalg.norm(image2 - image3))

Related

Tensorflow Image scaling and rescaling inconsistency when displaying loaded images using matplotlib

I am looking at this code and I cant seen to figure out why the image rescaling is not wrong.
import tensorflow as tf
import matplotlib.pyplot as plt
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [256, 256]
files = ["<add_an_image_location>"]
def decode_image(image):
image = tf.io.read_file(image)
image = tf.io.decode_png(image, channels=3)
image = tf.image.resize(image, [*IMAGE_SIZE])
image = (tf.cast(image, tf.float32) / 127.5) - 1 # this I understand, the image is being converted to be in range [-1, 1]
image = tf.reshape(image, [*IMAGE_SIZE, 3])
return image
def load_dataset(filenames, labeled=True, ordered=False):
dataset = tf.data.Dataset.from_tensor_slices(filenames)
dataset = dataset.map(decode_image, num_parallel_calls=AUTOTUNE)
return dataset
ds = load_dataset(files, labeled=True).batch(1)
example = next(iter(ds))
plt.imshow(example[0] * 0.5 + 0.5) # this I cant seem to figure out!
I realise and understand this would work plt.imshow(((example[0] + 1) * 127.5)).numpy().astype('uint'))
Cant seem to understand why this works plt.imshow(example[0] * 0.5 + 0.5)
Similarly -
(tf.cast(image, tf.float32) / 255) - 0.5 # scaling so
plt.imshow(example[0] + 0.5) # and rescaling so works
I have a feeling that I am missing something silly.. but.. please someone help me!! :D

Testing image classification model on new images

I am still a bit new to deep learning.
def predictionrelease(preds):
arr=[]
for i in range(0,len(preds)):
ans=np.argmax(preds[i])
arr.append(ans)
len(arr)
return arr
dir_path = 'predict'
for i in os.listdir(dir_path):
img = image.load_img(dir_path+ '\\' + i, target_size = (200,200,3))
plt.imshow(img)
plt.show()
X = image.img_to_array(img)
X = np.expand_dims(X, axis = 0)
images = np.vstack([X])
val = predictionrelease(model.predict(images))
print(val)
I was able to train a model on image classification. Now i try to predict new images in a single file using the model, but it's end up predicting only one of the images, whereas i want it to give prediction for all the of the images in the file. I iterated over the images, but it's seems not to be working. There is the code:
As a workaround you can use the code below to predict on all images present in a folder.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
dir_path = path_to_the_folder
images=[]
for i in os.listdir(dir_path):
img = tf.keras.utils.load_img(dir_path + i, target_size = (200,200,3))
plt.imshow(img)
plt.show()
X = tf.keras.utils.img_to_array(img)
X = np.expand_dims(X, axis = 0)
images.append(X)
arr=[]
pred=[]
for i in range(len(images)):
pred.append(model.predict(images[i],))
ans=np.argmax(pred)
arr.append(ans)
print(arr)

Different results in tensorflow prediction

I cannot understand why the following codes gives different results. I'm printing the first 3 components of the prediction array to compare results. my_features and feat have totally different results, but they should be the same, given the model and the data are the same. There should be something wrong in the loading and image preprocessing, but I cannot find it. Any help will be appreciated.
import tensorflow as tf
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras.applications.imagenet_utils import preprocess_input
model= MobileNetV3Small(weights='imagenet', include_top=False, pooling='avg')
DatasetPath= "DB"
imagePathList= sorted(os.listdir(DatasetPath))
imagePathList= [os.path.join(DatasetPath, imagePath) for imagePath in imagePathList]
def read_image(filename):
image_string = tf.io.read_file(filename)
image = tf.image.decode_jpeg(image_string, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [224,224])
image = tf.keras.applications.mobilenet_v3.preprocess_input(image)
return image
ds_imagePathList= tf.data.Dataset.from_tensor_slices(imagePathList)
dataset = ds_imagePathList.map(read_image, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(32, drop_remainder=False)
dataset = dataset.prefetch(tf.data.AUTOTUNE)
my_features = model.predict(dataset)
my_features[0][:3]
Second snippet
def loadProcessedImage(path):
#img = image.load_img(path, target_size=model.input_shape[1:3])
img = image.load_img(path, target_size= (224,224,3))
imgP = image.img_to_array(img)
imgP = np.expand_dims(imgP, axis=0)
imgP = preprocess_input(imgP)
return img, imgP
img, x = loadProcessedImage(imagePathList[0])
feat = model.predict(x)
feat = feat.flatten()
feat[:3]
The problem is related to the image resize. In the second snippet there is a call to load_img which internally uses pillow to load and resize the image. The problem is that tf.image.resize is not correct see here, and even this a 2018 blog post, the problem is still there

OpenCV and Tensorflow Colour issue (Most probably due to channels maybe?)

I was using this pytorch model for real-ESRGAN. It was giving good results:
Now I converted this model to tensorflow one. Now when I use this, I am getting image super resoluted (I reached to that conclusion due to image size of output) but its color channels are in very weird condition:
I am using opencv to take input and then model to process it. I feel issue is with BGR and RGB of OpenCV and Tensorflow but using cv2.COLOR_BGRTORGB not helping.
Any idea how to solve this?
This is my code:
from pyexpat import model
import tensorflow as tf
import os.path as osp
import glob
import cv2
import numpy as np
import torch
test_img_folder = './images/*'
model = tf.saved_model.load("./RealESRGAN_1/")
idx = 0
for path in glob.glob(test_img_folder):
idx += 1
base = osp.splitext(osp.basename(path))[0]
print(idx, base)
# read images
img = cv2.imread(path, cv2.IMREAD_COLOR)
print(img.shape)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.imshow('Input',img)
cv2.waitKey(0)
img = np.transpose(img, (2,0,1))
img = np.expand_dims(img, axis=0)
img = tf.dtypes.cast(img, tf.float32)
with torch.no_grad():
output = model(x = img)
output = output['sum'].numpy()
output = output[0, :, :, :]
output = np.transpose(output, (1,2,0))
cv2.imwrite('./results/{:s}_rlt.png'.format(base), output)

Tensor format issue from converting Pytorch -> Onnx -> Tensorflow

I have an issue with Tensorflow model that is converted from Pytorch -> Onnx -> Tensorflow. The issue is the converted Tensorflow model expects the input in Pytorch format that is (batch size, number channels, height, width) but not in Tensorflow format (batch size, height, width, number channel). Therefore, I cannot use the model to process further with Vitis AI.
So I would like to ask is there is any ways to convert this Pytorch input format to Tensorflow format by using tools from Onnx, Tensorflow 1, or others?
My code is as below:
Pytorch -> Onnx
from hardnet import hardnet
import torch
import onnx
ckpt = torch.load('../hardnet.pth')
model_state_dict = ckpt['model_state_dict']
optimizer_state_dict = ckpt['optimizer_state_dict']
model = hardnet(11)
model.load_state_dict(model_state_dict)
model.eval()
dummy_input = torch.randn(1, 3, 1080, 1920)
input_names = ['input0']
output_names = ['output0']
output_file = 'hardnet.onnx'
torch.onnx.export(model, dummy_input, output_file, verbose=True,
input_names=input_names, output_names=output_names,
opset_version=11, keep_initializers_as_inputs=True)
onnx_model = onnx.load(output_file)
onnx.checker.check_model(onnx_model)
print('Passed Onnx')
Onnx -> Tensorflow 1 (using Tensorflow 1.15)
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import onnx
from onnx_tf.backend import prepare
output_file = 'hardnet.onnx'
onnx_model = onnx.load(output_file)
output = prepare(onnx_model)
output.export_graph('hardnet.pb')
tf.compat.v1.disable_eager_execution()
def load_pb(path_to_pb: str):
"""From: https://stackoverflow.com/questions/51278213/what-is-the-use-of-a-pb-file-in-tensorflow-and-how-does-it-work
"""
with tf.gfile.GFile(path_to_pb, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name='')
return graph
graph = load_pb('hardnet.pb')
input = graph.get_tensor_by_name('input0:0')
output = graph.get_tensor_by_name('output0:0')
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img = cv2.imread('train_0.jpg', cv2.IMREAD_COLOR)
img = cv2.resize(img, (1920, 1080))
img = img/255
img = img - mean
img = img/std
img = np.expand_dims(img, -1)
# To Pytorch format.
img = np.transpose(img, (3, 2, 0, 1))
img = img
with tf.Session(graph=graph) as sess:
pred = sess.run(output, {input: img})
You could wrap your Pytorch model into another one that would do the transpose you want to have in TensorFlow. See the following example:
Let's say you have the following toy NN:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.rnn = nn.LSTM(10, 20, 2)
def forward(self, x):
h0 = torch.zeros(2, 3, 20)
c0 = torch.zeros(2, 3, 20)
return self.rnn(x, (h0, c0))
the exemplary pytorch/tensorflow input shape would be :
>> pytorch_input = torch.randn(5, 3, 10)
>> tf_input = torch.transpose(pytorch_input, 1, 2)
>> print("PyTorch input shape: ", pytorch_input.shape)
>> print("TensorFlow input shape: ", tf_input.shape)
PyTorch input shape: torch.Size([5, 3, 10])
TensorFlow input shape: torch.Size([5, 10, 3])
Now, the wrapper which will first transpose input and then pass transposed input to some model:
class NetTensorFlowWrapper(nn.Module):
def __init__(self, main_module: nn.Module):
super(NetTensorFlowWrapper, self).__init__()
self.main_module = main_module
def forward(self, x):
x = torch.transpose(x, 1, 2)
return self.main_module(x)
Then, this is possible:
net = Net()
net_wrapper = NetTensorFlowWrapper(net)
net(pytorch_input)
net_wrapper(tf_input)
and then, when you finally save your models like you did previously via torch.onnx.export and read their graph via onnx package (not torch.onnx) you will have...
for Net- input 5x3x10 and no transpose layer
graph torch-jit-export (
%input0[FLOAT, 5x3x10]
{
%76 = Shape(%input0)
%77 = Constant[value = <Scalar Tensor []>]()
for NetTensorFlowWrapper- input 5x10x3 and transpose layer
graph torch-jit-export (
%input0[FLOAT, 5x10x3]
{
%9 = Transpose[perm = [0, 2, 1]](%input0)
%77 = Shape(%9)
%78 = Constant[value = <Scalar Tensor []>]()
...