How to multiply input images with mask in tensorflow? - tensorflow

I want to multiply every input image with a mask of the same size as the input image. How would I do that in tensorflow?
My image reading function looks like this so far:
img_contents = tf.read_file(input_queue[0])
label_contents = tf.read_file(input_queue[1])
img = tf.image.decode_png(img_contents, channels=3)
label = tf.image.decode_png(label_contents, channels=1)
# Now I want to do something like this?
mask = tf.constant(1.0, dtype=tf.float32, shape=img.shape)
img_masked = tf.multiply(img,mask)
Is that possible?
Not sure if img is already a tensor object and I can use that function here. I'm new to tensorflow...

Here is the code that works well for me. I'm using jupyter notebook to run the code.
%matplotlib inline
import tensorflow as tf
from matplotlib.image import imread
import matplotlib.pyplot as plt
# Loading test image from the local filesystem
x = tf.Variable(imread("test_img.jpg"),dtype='float32')
x_mask = tf.Variable(imread("test_mask.jpg"),dtype='float32')
img_mult = tf.multiply(x,x_mask)
plt.imshow(imread("test_img.jpg"))
plt.show()
plt.imshow(imread("test_mask.jpg"))
plt.show()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
res = sess.run(img_mult)
plt.imshow(res)
plt.show()
Also, Here is a good YouTube tutorial covering image manipulation with TF: https://www.youtube.com/watch?v=bvHgESVuS6Q&t=447s

Related

Preprocessing layers with seed not producing the same data augmentation for images and masks

I'm trying to create a simple preprocessing augmentation layer, following this Tensorflow tutorial. I created this 'simple' example that shows the problem I'm having.
Even though I'm initializing the augmentation class with a seed, operations applied to the images, and the corresponding masks are not always equal.
What am I doing wrong?
Note: tf v2.10.0
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import skimage
import rasterio as rio
def normalize(array: np.ndarray):
""" normalise image to give a meaningful output """
array_min, array_max = array.min(), array.max()
return (array - array_min) / (array_max - array_min)
# field
im = rio.open('penguins.tif')
fields = np.zeros((1,im.shape[0],im.shape[1],3))
fields[0,:,:,0] = normalize(im.read(1))
fields[0,:,:,1] = normalize(im.read(2))
fields[0,:,:,2] = normalize(im.read(3))
# mask is a simple contour
masks = skimage.color.rgb2gray(skimage.filters.sobel(fields[0]))
masks = np.expand_dims(masks, [0,3])
In this case, the dataset is only one image, we can use this function to visualize the field and the mask.
def show(field:np.ndarray, mask:np.ndarray):
"""Show the field and corresponding mask."""
fig = plt.figure(figsize=(8,6))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ax1.imshow(field[:,:,:3])
ax2.imshow(mask,cmap='binary')
plt.tight_layout()
plt.show()
show(fields[0], masks[0])
Alright, now I used the example from the tutorial that will randomly flip (horizontal) the image and the mask.
class Augment(tf.keras.layers.Layer):
def __init__(self, seed=42):
super().__init__()
# both use the same seed, so they'll make the same random changes.
self.augment_inputs = tf.keras.layers.RandomFlip(mode="horizontal", seed=seed)
self.augment_labels = tf.keras.layers.RandomFlip(mode="horizontal", seed=seed)
def call(self, inputs, labels):
inputs = self.augment_inputs(inputs)
labels = self.augment_labels(labels)
return inputs, labels
Now if I run the following multiple times, I will eventually get opposite flip on the field and mask.
# Create a tf.datasets
ds = tf.data.Dataset.from_tensor_slices((fields, masks))
ds2 = ds.map(Augment())
for f,m in ds2.take(1):
show(f, m)
I would expect the image and its mask to be flip the same way since I set the seed in the Augment class as suggested in the Tensorflow tutorial.
Augmentation can be done on the concatenated image and mask along the channel axis to form a single array and then recover the image and label back, which is shown below:
class Augment(tf.keras.layers.Layer):
def __init__(self):
super().__init__()
# both use the same seed, so they'll make the same random changes.
self.augment_inputs = tf.keras.layers.RandomRotation(0.3)
def call(self, inputs, labels):
output = self.augment_inputs(tf.concat([inputs, labels], -1) )
inputs = output[:,:,0:4]
labels = output[:,:,4:]
return inputs, labels

Testing image classification model on new images

I am still a bit new to deep learning.
def predictionrelease(preds):
arr=[]
for i in range(0,len(preds)):
ans=np.argmax(preds[i])
arr.append(ans)
len(arr)
return arr
dir_path = 'predict'
for i in os.listdir(dir_path):
img = image.load_img(dir_path+ '\\' + i, target_size = (200,200,3))
plt.imshow(img)
plt.show()
X = image.img_to_array(img)
X = np.expand_dims(X, axis = 0)
images = np.vstack([X])
val = predictionrelease(model.predict(images))
print(val)
I was able to train a model on image classification. Now i try to predict new images in a single file using the model, but it's end up predicting only one of the images, whereas i want it to give prediction for all the of the images in the file. I iterated over the images, but it's seems not to be working. There is the code:
As a workaround you can use the code below to predict on all images present in a folder.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
dir_path = path_to_the_folder
images=[]
for i in os.listdir(dir_path):
img = tf.keras.utils.load_img(dir_path + i, target_size = (200,200,3))
plt.imshow(img)
plt.show()
X = tf.keras.utils.img_to_array(img)
X = np.expand_dims(X, axis = 0)
images.append(X)
arr=[]
pred=[]
for i in range(len(images)):
pred.append(model.predict(images[i],))
ans=np.argmax(pred)
arr.append(ans)
print(arr)

Different results in tensorflow prediction

I cannot understand why the following codes gives different results. I'm printing the first 3 components of the prediction array to compare results. my_features and feat have totally different results, but they should be the same, given the model and the data are the same. There should be something wrong in the loading and image preprocessing, but I cannot find it. Any help will be appreciated.
import tensorflow as tf
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras.applications.imagenet_utils import preprocess_input
model= MobileNetV3Small(weights='imagenet', include_top=False, pooling='avg')
DatasetPath= "DB"
imagePathList= sorted(os.listdir(DatasetPath))
imagePathList= [os.path.join(DatasetPath, imagePath) for imagePath in imagePathList]
def read_image(filename):
image_string = tf.io.read_file(filename)
image = tf.image.decode_jpeg(image_string, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [224,224])
image = tf.keras.applications.mobilenet_v3.preprocess_input(image)
return image
ds_imagePathList= tf.data.Dataset.from_tensor_slices(imagePathList)
dataset = ds_imagePathList.map(read_image, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(32, drop_remainder=False)
dataset = dataset.prefetch(tf.data.AUTOTUNE)
my_features = model.predict(dataset)
my_features[0][:3]
Second snippet
def loadProcessedImage(path):
#img = image.load_img(path, target_size=model.input_shape[1:3])
img = image.load_img(path, target_size= (224,224,3))
imgP = image.img_to_array(img)
imgP = np.expand_dims(imgP, axis=0)
imgP = preprocess_input(imgP)
return img, imgP
img, x = loadProcessedImage(imagePathList[0])
feat = model.predict(x)
feat = feat.flatten()
feat[:3]
The problem is related to the image resize. In the second snippet there is a call to load_img which internally uses pillow to load and resize the image. The problem is that tf.image.resize is not correct see here, and even this a 2018 blog post, the problem is still there

OpenCV and Tensorflow Colour issue (Most probably due to channels maybe?)

I was using this pytorch model for real-ESRGAN. It was giving good results:
Now I converted this model to tensorflow one. Now when I use this, I am getting image super resoluted (I reached to that conclusion due to image size of output) but its color channels are in very weird condition:
I am using opencv to take input and then model to process it. I feel issue is with BGR and RGB of OpenCV and Tensorflow but using cv2.COLOR_BGRTORGB not helping.
Any idea how to solve this?
This is my code:
from pyexpat import model
import tensorflow as tf
import os.path as osp
import glob
import cv2
import numpy as np
import torch
test_img_folder = './images/*'
model = tf.saved_model.load("./RealESRGAN_1/")
idx = 0
for path in glob.glob(test_img_folder):
idx += 1
base = osp.splitext(osp.basename(path))[0]
print(idx, base)
# read images
img = cv2.imread(path, cv2.IMREAD_COLOR)
print(img.shape)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.imshow('Input',img)
cv2.waitKey(0)
img = np.transpose(img, (2,0,1))
img = np.expand_dims(img, axis=0)
img = tf.dtypes.cast(img, tf.float32)
with torch.no_grad():
output = model(x = img)
output = output['sum'].numpy()
output = output[0, :, :, :]
output = np.transpose(output, (1,2,0))
cv2.imwrite('./results/{:s}_rlt.png'.format(base), output)

How do I draw a resized image in TensorFlow?

It seems like images in TensorFlow get transformed to a different kind of image coordinate system after any transformation (e.g. resize) is applied. Drawing the image results in this:
%matplotlib inline
import tensorflow as tf
from matplotlib import pyplot as plt
with tf.device("/cpu:0"):
file_contents = tf.read_file('any_image.png')
image = tf.image.decode_png(file_contents)
image.set_shape([375, 1242, 3])
image = tf.image.resize_images(image, 448, 448)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
image_val = sess.run([image])
plt.figure(figsize=(16, 8))
plt.imshow(image_val[0], interpolation='nearest')
plt.show()
plt.close()
If I remove the resize operation it draws the regular image. How do I get matplotlib to draw the resized image correctly or tell Tensorflow to return it to RGB?
Seems like there is no image transformation besides unsigned integer to float. Converting back to unsigned integer fixed the problem.
plt.imshow(image_val[0].astype(np.uint8), interpolation='nearest')