Tensorflow Image scaling and rescaling inconsistency when displaying loaded images using matplotlib

I am looking at this code and I cant seen to figure out why the image rescaling is not wrong.
import tensorflow as tf
import matplotlib.pyplot as plt
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMAGE_SIZE = [256, 256]
files = ["<add_an_image_location>"]
def decode_image(image):
image = tf.io.read_file(image)
image = tf.io.decode_png(image, channels=3)
image = tf.image.resize(image, [*IMAGE_SIZE])
image = (tf.cast(image, tf.float32) / 127.5) - 1 # this I understand, the image is being converted to be in range [-1, 1]
image = tf.reshape(image, [*IMAGE_SIZE, 3])
return image
def load_dataset(filenames, labeled=True, ordered=False):
dataset = tf.data.Dataset.from_tensor_slices(filenames)
dataset = dataset.map(decode_image, num_parallel_calls=AUTOTUNE)
return dataset
ds = load_dataset(files, labeled=True).batch(1)
example = next(iter(ds))
plt.imshow(example[0] * 0.5 + 0.5) # this I cant seem to figure out!
I realise and understand this would work plt.imshow(((example[0] + 1) * 127.5)).numpy().astype('uint'))
Cant seem to understand why this works plt.imshow(example[0] * 0.5 + 0.5)
Similarly -
(tf.cast(image, tf.float32) / 255) - 0.5 # scaling so
plt.imshow(example[0] + 0.5) # and rescaling so works
I have a feeling that I am missing something silly.. but.. please someone help me!! :D


How to convert model test predictions to png

I trained model with pictures and masks. I am trying to get predicted masks.
model = load_model('unet_brain_mri_seg.hdf5', custom_objects={'dice_coef_loss': dice_coef_loss, 'iou': iou, 'dice_coef': dice_coef})
model.compile(optimizer=opt, loss=dice_coef_loss, metrics=["binary_accuracy", iou, dice_coef])
test_gen = train_generator(df2, BATCH_SIZE,
target_size=(im_height, im_width))
results = model.evaluate(test_gen, steps=len(df2) / BATCH_SIZE)
print("Test lost: ",results[0])
print("Test IOU: ",results[1])
print("Test Dice Coefficent: ",results[2])
from PIL import Image
for i in range(30):
img = cv2.imread(df2['filename'].iloc[index])
img = cv2.resize(img ,(im_height, im_width))
img = img / 255
img = img[np.newaxis, :, :, :]
plt.title('Original Image')
plt.title('Original Mask')
plt.imshow(np.squeeze(pred) > .5)
im = Image.fromarray(pred)
I am loading model and getting test predictions but I don't know how can I convert predictions to png.
As in the UNet network, outputs are also images, you can save output as an image like this:
pred = model.predict(img)
pred = np.squeeze(pred, axis=0) #remove batch axis (1,256,256,1) => (256,256,1)

Memory leak when using tf.data Datasets with shuffle

I have a memory leak somehow when I create my tf.data.dataset pipeline, but I don't know where.
My code works fine with ImageDataGenerator but is really slow.
Reading a lot of documentation I thought it might be albumentations.
However I now switched my transform to be entirely in tensorflow:
def map_data(inputs, outputs):
image = inputs['image_input']
image = parse_image(image)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, size = [224, 224])
image = tf.image.random_flip_left_right(image)
image = tf.image.random_flip_up_down(image)
image = tf.image.random_brightness(image, max_delta = 0.5)
#image = tf.expand_dims(image, axis=3)
other = parse_image(inputs['other_input'])
other = tf.cast(other, tf.float32) / 255.0
other = tf.image.resize(other, size = [224, 224])
other = tf.image.random_flip_left_right(other)
other = tf.image.random_flip_up_down(other)
other = tf.image.random_brightness(other, max_delta = 0.5)
return {'image_input': image, 'other_input': other}, outputs
And I made the shuffle buffer extremely small:
#dataset = dataset.prefetch(tf.data.AUTOTUNE)
dataset = (dataset
.map(map_data, num_parallel_calls=AUTOTUNE)
Could autotune cause this?
On Colab I usually hit the RAM restart at 500 batches
I would like to use tf.data.datasets because it's really much faster if possible.
Thank you for anyone who can point me to the flaw in my code, I have always used generators and only recently made the switch.

OpenCV - convert uint8 image to float32 normalized image

I'm trying to convert parts of a Keras DarkNet code to try to make the code run faster.
Here is the code I'm trying to optimize:
model_image_size = (416, 416)
import cv2
from PIL import Image
frame = cv2.imread("test.png", cv2.IMREAD_COLOR)
im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
im = Image.fromarray(im).crop((1625, 785, 1920, 1080)) # crop ROI
resized_image = im.resize(tuple(reversed(model_image_size)), Image.BICUBIC)
image_data = np.array(resized_image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
return image_data
This is my attempt to achieve the same output without using the intermediate PIL coversion to reduce time:
model_image_size = (416, 416)
import cv2
frame = cv2.imread("test.png", cv2.IMREAD_COLOR)
frame = frame[785:1080,1625:1920] # crop ROI
im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
resized_image = cv2.resize(im, model_image_size, interpolation = cv2.INTER_CUBIC)
resized_image /= 255.
image_data = np.expand_dims(resized_image, 0) # Add batch dimension.
return image_data
However, upon running the code, it will return:
resized_image /= 255.
TypeError: ufunc 'true_divide' output (typecode 'd') could not be coerced to provided output parameter (typecode 'B') according to the casting rule ''same_kind''
It seems like I need to change the uint8 type to float32 before normalizing but I'm not sure how to achieve it with OpenCV.
You can use resized_image.astype(np.float32) to convert resized_image data from unit8 to float32 and then proceed with normalizing and other stuffs:
frame = cv2.imread("yourfile.png")
frame = frame[200:500,400:1000] # crop ROI
im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
model_image_size = (416, 416)
resized_image = cv2.resize(im, model_image_size, interpolation = cv2.INTER_CUBIC)
resized_image = resized_image.astype(np.float32)
resized_image /= 255.
image_data = np.expand_dims(resized_image, 0) # Add batch dimension.
Your issue is that you are dividing and assigning to the same variable with /=. Numpy expects that when you do that, the array is of the same type as before, but you are dividing with a floating point number which will change the value type.
To solve this issue you can do:
resized_image = resized_image / 255.
and it should work. But you have to note that it will convert the matrix to dtype=float64. To convert it to float32you can do:
The np should come from:
import numpy as np

MNIST GAN generators white area in middle surrounded by black

The following code is copied from a GAN MNIST tutorial on UDEMY. When I run the code, it converges towards creating images with a large white area in the center that is black at the sides (picture an empty filled circle against a black background). I have no idea what the problem is as I have only done what the tutorial told me to do word for word. The only difference is that I extract the MNIST data differently. Is there something about tensorflow that has changed recently?
import tensorflow as tf
import numpy as np
import gzip
from PIL import Image
import os.path
def extract_data(filename, num_images):
"""Extract the images into a 4D tensor [image index, y, x, channels].
Values are rescaled from [0, 255] down to [-0.5, 0.5].
print('Extracting', filename)
with gzip.open(filename) as bytestream:
buf = bytestream.read(28 * 28 * num_images)
data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
#data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
data = data.reshape(num_images, 28, 28, 1)
return data
fname_img_train = extract_data('../Data/MNIST/train-images-idx3-ubyte.gz', 60000)
def generator(z, reuse=None):
with tf.variable_scope('gen',reuse=reuse):
hidden1 = tf.layers.dense(inputs=z,units=128)
alpha = 0.01
hidden2 = tf.maximum(alpha*hidden2,hidden2)
output=tf.layers.dense(hidden2,units=784, activation=tf.nn.tanh)
return output
def discriminator(X, reuse=None):
with tf.variable_scope('dis',reuse=reuse):
return output, logits
G = generator(z)
D_output_real, D_logits_real = discriminator(real_images)
D_output_fake, D_logits_fake = discriminator(G,reuse=True)
def loss_func(logits_in,labels_in):
return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
D_real_loss = loss_func(D_logits_real,tf.ones_like(D_logits_real)*0.9)
D_fake_loss = loss_func(D_logits_fake,tf.zeros_like(D_logits_real))
D_loss = D_real_loss + D_fake_loss
G_loss = loss_func(D_logits_fake,tf.ones_like(D_logits_fake))
learning_rate = 0.001
tvars = tf.trainable_variables()
d_vars= [var for var in tvars if 'dis' in var.name]
g_vars = [var for var in tvars if 'gen' in var.name]
D_trainer = tf.train.AdamOptimizer(learning_rate).minimize(D_loss,var_list=d_vars)
G_trainer = tf.train.AdamOptimizer(learning_rate).minimize(G_loss,var_list=g_vars)
init = tf.global_variables_initializer()
def create_image(img, name):
img = np.reshape(img, (28, 28))
img = (np.multiply(np.divide(np.add(img, 1.0), 2.0),255.0).astype(np.int16))
im = Image.fromarray(img.astype('uint8'))
with tf.Session() as sess:
for epoch in range(epochs):
for i in range(num_batches):
batch = fname_img_train[i*batch_size:((i+1)*batch_size)]
batch_images = np.reshape(batch, (batch_size,784))
batch_images = batch_images*2.0-1.0
batch_z = np.random.uniform(-1,1,size=(batch_size,100))
_ = sess.run(D_trainer, feed_dict={real_images:batch_images,z:batch_z})
_ = sess.run(G_trainer,feed_dict={z:batch_z})
print("ON EPOCH {}".format(epoch))
sample_z = np.random.uniform(-1,1,size=(batch_size,100))
gen_sample = sess.run(G,feed_dict={z:sample_z})
create_image(gen_sample[0], "img"+str(epoch)+".png")
As far as I can see, you are not normalizing the training data. Instead of using your extract_data() function, it is much easier to do the following:
from tensorflow.keras.datasets.mnist import load_data()
(train_data, train_labels), _ = load_data()
train_data /= 255.
Besides, usually people sample twice from the latent space each epoch: once for the discriminator and once for the generator. Still, it did not seem to make a difference.
After implementing these changes, using a batch size of 200 and training for 100 epochs, I got the following result: gen_sample. The result is pretty bad, but it is definitely better than an "empty filled circle against a black background".
Note that the architecture of the generator and of the discriminator that you are using is very simple. From my experience, stacking some convolutional layers gives perfect results. In addition, I would not use the tf.maximum() function, since it creates discontinuities that may negatively impact the flow of the gradients.
Finally, instead of your create_image() function, I used the following:
def plot_mnist(samples, name):
fig = plt.figure(figsize=(6,6))
gs = gridspec.GridSpec(6,6)
gs.update(wspace=0.05, hspace=0.05)
for i, sample in enumerate(samples):
ax = plt.subplot(gs[i])
plt.imshow(sample.reshape(28,28), cmap='Greys_r')
There are many different ways of improving the quality of a GAN model, and the majority of those techniques can be easily found online. Please let me know if you have any specific question.

Image transformation in Tensorflow vs Caffe vs PIL + skimage

I created a minimum working example of an image preprocessing step which is to be ported over from Caffe v.1 to tensorflow. I am able to reproduce the steps using PIL + skimage however, I am unable to do the same in tensorflow. As observed, the L2 norm between the transformed image in Caffe and tensorflow is high which is not the case for the PIL + skimage. How do we reproduce the steps the image underwent in Caffe or PIL methods using tensorflow?
import tensorflow as tf
import numpy as np
from PIL import Image
import caffe
from skimage.transform import resize
import requests
image_url = 'https://tinyjpg.com/images/social/website.jpg'
TEST_IMAGE = 'test_image.jpg'
DATA_LAYER = 'data_p'
MEAN = np.array([131.26315308, 140.62084961, 142.71440125], dtype=np.float32)
img_data = requests.get(image_url).content
with open(TEST_IMAGE, 'wb') as handler:
def create_transformer():
transformer = caffe.io.Transformer({DATA_LAYER: (1, 3, 224, 224)})
transformer.set_transpose(DATA_LAYER, (2,0,1))
transformer.set_mean(DATA_LAYER, MEAN)
transformer.set_raw_scale(DATA_LAYER, 255)
return transformer
def transform_image_original(test_image):
Creates a caffe.io.Transformer
t = create_transformer()
image_data = Image.fromarray(np.uint8(caffe.io.load_image(TEST_IMAGE) * 255))
input_image = np.array(image_data) / 255.0
transformed_image = t.preprocess(DATA_LAYER, input_image)
return transformed_image
def _resize(im, new_dims,interp_order=1):
im_min, im_max = im.min(), im.max()
if im_max > im_min:
im_std = (im - im_min) / (im_max - im_min)
resized_std = resize(im_std, new_dims, order=interp_order, mode='constant')
resized_im = resized_std * (im_max - im_min) + im_min
return resized_im
def preprocess_image(TEST_IMAGE, height=224, width=224):
replicates the caffe tranformation using PIL and skimage.transform
with open(TEST_IMAGE, 'rb') as f:
image = Image.open(f)
image = image.convert('RGB')
image = np.array(image, np.float32) / 255.0
image = _resize(image, (224, 224))
image = np.array(image, np.float32) * 255.0
import scipy.misc
scipy.misc.imsave('pil_file.jpg', image)
image = np.transpose(image, (2, 0, 1))
image = image[::-1, ...] # convert RGB to BGR
image = image - MEAN.reshape((3, 1, 1))
return image
def tf_preprocess_image(TEST_IMAGE, height=224, width=224):
preprocessing an image in tensorflow
image_string = tf.read_file(TEST_IMAGE)
image = tf.image.decode_jpeg(image_string, channels=3, dct_method='INTEGER_ACCURATE',
fancy_upscaling=False, acceptable_fraction=1, try_recover_truncated=True)
image = tf.to_float(image) / 255
image = tf.div(
if height and width:
# Resize the image to the specified height and width.
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width],
image = tf.squeeze(image, [0])
image = tf.add(
image = tf.to_float(image) * 255
# RGB to BGR using strided slice
image = image[..., ::-1]
# Channel last to channel first
image = tf.transpose(image, [2, 0, 1])
# Mean subtraction
image = tf.subtract(image, MEAN.reshape(3,1,1))
return image
print('Preprocessing test image using Caffe...')
image = transform_image_original(TEST_IMAGE)
print('Preprocessing test image using PIL + skimage.transform ...')
image2 = preprocess_image(TEST_IMAGE)
print('Preprocessing test image using tensorflow')
with tf.Session() as sess:
tf_image3 = tf_preprocess_image(TEST_IMAGE)
image3 = tf_image3.eval()
print('L2 norm between caffe transformation and PIL + skimage', np.linalg.norm(image - image2)) # L2 norm
print('L2 norm between caffe transformation and tf', np.linalg.norm(image2 - image3))