How to convert model test predictions to png - tensorflow

I trained model with pictures and masks. I am trying to get predicted masks.
model = load_model('unet_brain_mri_seg.hdf5', custom_objects={'dice_coef_loss': dice_coef_loss, 'iou': iou, 'dice_coef': dice_coef})
model.compile(optimizer=opt, loss=dice_coef_loss, metrics=["binary_accuracy", iou, dice_coef])
test_gen = train_generator(df2, BATCH_SIZE,
dict(),
target_size=(im_height, im_width))
results = model.evaluate(test_gen, steps=len(df2) / BATCH_SIZE)
print("Test lost: ",results[0])
print("Test IOU: ",results[1])
print("Test Dice Coefficent: ",results[2])
from PIL import Image
i=0
for i in range(30):
index=np.random.randint(1,len(df2.index))
img = cv2.imread(df2['filename'].iloc[index])
img = cv2.resize(img ,(im_height, im_width))
img = img / 255
img = img[np.newaxis, :, :, :]
pred=model.predict(img)
plt.figure(figsize=(12,12))
plt.subplot(1,3,1)
plt.imshow(np.squeeze(img))
plt.title('Original Image')
plt.subplot(1,3,2)
plt.imshow(np.squeeze(cv2.imread(df2['mask'].iloc[index])))
plt.title('Original Mask')
plt.subplot(1,3,3)
plt.imshow(np.squeeze(pred) > .5)
plt.title('Prediction')
plt.show()
pred=np.asarray(pred)
im = Image.fromarray(pred)
im.save(r'C:\Users\Admin\Desktop\saglika2\Test\predicted_maske\''+str(i)+"pred_image.png")
i=i+1
I am loading model and getting test predictions but I don't know how can I convert predictions to png.

As in the UNet network, outputs are also images, you can save output as an image like this:
pred = model.predict(img)
pred = np.squeeze(pred, axis=0) #remove batch axis (1,256,256,1) => (256,256,1)
tf.keras.preprocessing.image.save_img("pred.png",pred)

Related

Difference between valid accuracy from CategoricalAccuracy() and manual prediction

I used CategoricalAccuracy() to calculate the valid_accuracy during training and reached 0.733 in a certain round. After training, I use the model weights saved under the epoch to predict the same valid data set, and the accuracy obtained is only 0.38. Why is this?
This is my training log
I only divided the data into two sets: training set and validation set.
There are 517 images in my validation set. I use this model to make manual predictions and compare them with the true labels. Only 199 images are correctly predicted. The accuracy rate is only 0.38, which is less than 0.733.
Even if it is overfitting, I think these two values should be close.
for epoch in range(EPOCHS):
step = 0
for features in train_dataset:
step += 1
images, labels = process_features(features, data_augmentation=True)
train_step(images, labels)
print("Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}".format(epoch,
EPOCHS,
step,
math.ceil(train_count / BATCH_SIZE),
train_loss.result().numpy(),
train_accuracy.result().numpy()))
for features in valid_dataset:
valid_images, valid_labels = process_features(features, data_augmentation=False)
valid_step(valid_images, valid_labels)
print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
"valid loss: {:.5f}, valid accuracy: {:.5f}".format(epoch,
EPOCHS,
train_loss.result().numpy(),
train_accuracy.result().numpy(),
valid_loss.result().numpy(),
valid_accuracy.result().numpy()))
def process_features(features, data_augmentation):
image_raw = features['image_raw'].numpy()
image_tensor_list = []
for image in image_raw:
image_tensor = load_and_preprocess_image(image, data_augmentation=data_augmentation)
image_tensor_list.append(image_tensor)
images = tf.stack(image_tensor_list, axis=0)
labels = features['label'].numpy()
new_labels = tf.one_hot(labels,7)
return images, new_labels
def load_and_preprocess_image(image_raw, data_augmentation=False):
# decode
image_tensor = tf.io.decode_image(contents=image_raw, channels=CHANNELS, dtype=tf.dtypes.float32)
if data_augmentation:
image = tf.image.random_flip_left_right(image=image_tensor)
image = tf.image.resize_with_crop_or_pad(image=image,
target_height=int(IMAGE_HEIGHT * 1.2),
target_width=int(IMAGE_WIDTH * 1.2))
image = tf.image.random_crop(value=image, size=[IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS])
image = tf.image.random_brightness(image=image, max_delta=0.5)
else:
image = tf.image.resize(image_tensor, [IMAGE_HEIGHT, IMAGE_WIDTH])
return image
def valid_step(image_batch, label_batch):
predictions = model(image_batch, training=False)
v_loss = loss_object(label_batch, predictions)
valid_loss.update_state(values=v_loss)
valid_accuracy.update_state(y_true=label_batch, y_pred=predictions)
The following is my code to detect each picture of validation set
import tensorflow as tf
from configuration import save_model_dir, test_image_dir
from prepare_data import load_and_preprocess_image
from train import get_model
def get_single_picture_prediction(model, picture_dir):
image_tensor = load_and_preprocess_image(tf.io.read_file(filename=picture_dir), data_augmentation=False)
image = tf.expand_dims(image_tensor, axis=0)
prediction = model(image, training=False)
pred_class = tf.math.argmax(prediction, axis=-1)
return pred_class
if __name__ == '__main__':
# GPU settings
gpus = tf.config.list_physical_devices('GPU')
if gpus:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# load the model
model = get_model()
model.load_weights(filepath=save_model_dir+"model")
pred_class = get_single_picture_prediction(model, test_image_dir)
print(pred_class)
This is my picture

Preprocessing test images using opencv for prediction

I am working on a dataset of gray images that are saved under RGB format. I trained VGG16 on this dataset, and preprocessed them this way:
train_data_gen = ImageDataGenerator(rescale=1./255,rotation_range = 20,
width_shift_range = 0.2,
height_shift_range = 0.2,
horizontal_flip = True)
validation_data_gen = ImageDataGenerator(rescale=1./255)
train_gen= train_data_gen.flow_from_directory(trainPath,
target_size=(224, 224),
batch_size = 64,
class_mode='categorical' )
validation_gen= validation_data_gen.flow_from_directory(validationPath, target_size=(224, 224),
batch_size = 64, class_mode='categorical' )
When the training was done, both training and validation accuracy were high (92%).
In the prediction phase, I first tried to preprocess images as indicated in https://keras.io/applications/ :
img = image.load_img(img_path, target_size=(image_size,image_size))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
However, the test accuracy was very low! around 50%. I tried the prediction again on train and validation samples and got a low accuracy, also around 50%, which means that the problem is in the prediction phase.
Instead, I preprocessed images using OpenCV library, and the accuracy was better, but still not as expected. I tried to make the prediction on train samples (where accuracy during training was 92%), and during the prediction I got 82%. Here is the code:
img = cv2.imread(imagePath)
#np.flip(img, axis=-1)
img= cv2.resize(img, (224, 224),
interpolation = cv2.INTER_AREA)
img = np.reshape(img,
(1, img.shape[0], img.shape[1], img.shape[2]))
img = img/255.
The result is the same with/without flipping the image. What's wrong with the preprocessing step?
Thanks
The error was in the interpolation parameter of resize function. It should be cv2.INTER_NEAREST instead of cv2.INTER_AREA.

Can't preprocess data before training for image segmentation

I'm trying to do some image segmentation for ocr, my mask image is a 3 classes image, like this
and my original image is a gray image like this
but when I try to fit the model I get this error
could not broadcast input array from shape (128,128,3) into shape (128,128)
here is the code I'm using to create the datasets
img_size = (128, 128)
batch_size = 32
input_img_paths = sorted(
[ os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith(".jpg") ] )
target_img_paths = sorted(
[ os.path.join(target_dir, fname)
for fname in os.listdir(target_dir)
if fname.endswith(".jpg") and not fname.startswith(".") ])
class OxfordPets(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
"""Returns tuple (input, target) correspond to batch #idx."""
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((batch_size,) + self.img_size, dtype="float32")
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
x[j] = img
y = np.zeros((batch_size,) + self.img_size, dtype="float32")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="rgb")
y[j] = img
return x, y
val_samples = 150
random.Random(1337).shuffle(input_img_paths)
random.Random(1337).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]
# Instantiate data Sequences for each split
train_gen = OxfordPets(
batch_size, img_size, train_input_img_paths, train_target_img_paths
)
val_gen = OxfordPets(batch_size, img_size, val_input_img_paths, val_target_img_paths)
but when i try to fit whit this
model_history = model.fit(train_gen, epochs=30,
steps_per_epoch=50,
validation_steps=25,
validation_data=val_gen)
I get the error, I am trying to adapt this solution
https://keras.io/examples/vision/oxford_pets_image_segmentation/?fbclid=IwAR2wFYju-N0X7FUaWkhvOVaAAaVqLdOryBwg7xDC0Rji9LQ5F2jYOkeNnns
from keras
into the example of the tensorflow page
https://www.tensorflow.org/tutorials/images/segmentation
and I have the impression that the problem has something to do whit the fact that the original image is on gray scale, how can I solve this error? any advice would be great!
You should convert your image to RGB first. Your image is gray-scaled and has only 1 channel. Its shape is (128,128,1). Them apply sth like opencv: backtorgb = cv2.cvtColor(gray,cv2.COLOR_GRAY2RGB) to every image in your data and everything will be ok
Your mask is RGB and has 3 channels. but your image is grayscale and has one channel. See This question for converting RGB image to grayscale image

How to create a bi-input TPU model for images?

I want to convert my GPU model to TPU model. My GPU model takes two input image and has the same output for both images. I use custom data generator for this. There are two parallel networks; one for each input.
From this StackOverflow question, I tried to solve this but I failed.
Here is what I tried
dataset_12 = tf.data.Dataset.from_tensor_slices((left_train_paths, right_train_paths))
dataset_label = tf.data.Dataset.from_tensor_slices(train_labels)
dataset = tf.data.Dataset.zip((dataset_12, dataset_label)).batch(2).repeat()
Problem I am facing is that I am unable to decode the bi-input images.
Here is the decoder function
def decode_image(filename, label=None, image_size=(IMG_SIZE_h, IMG_SIZE_w)):
bits = tf.io.read_file(filename)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
#convert to numpy and do some cv2 staff mb?
if label is None:
return image
else:
return image, label
The issue is that I am unable to pass both images to the decoder function at the same time. How can I resolve this?
I also try to decode the image in following way
def decode(img,image_size=(IMG_SIZE_h, IMG_SIZE_w)):
bits = tf.io.read_file(img)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
return image
def decode_image(left, right,labels=None ):
if labels is None:
return decode(left),decode(right)
else:
return decode(left),decode(right),labels
image=tf.data.Dataset.from_tensor_slices((left_train_paths,right_train_paths,train_labels ))
dataset=image.map(decode_image, num_parallel_calls=AUTO).repeat().shuffle(512).batch(BATCH_SIZE).prefetch(AUTO)
dataset
The output is of dataset variable is now as
<PrefetchDataset shapes: ((None, 760, 760, 3), (None, 760, 760, 3), (None, 8)), types: (tf.float32, tf.float32, tf.int64)>
How can I pass it to the model now?
Model
def get_model():
left_tensor = Input(shape=(IMG_SIZE_h,IMG_SIZE_w,3))
right_tensor = Input(shape=(IMG_SIZE_h,IMG_SIZE_w,3))
left_model = EfficientNetB3(input_shape = (img_shape,img_shape,3), include_top = False, weights = 'imagenet',input_tensor=left_tensor)
right_model = EfficientNetB3(input_shape = (img_shape,img_shape,3), include_top = False, weights = 'imagenet',input_tensor=right_tensor)
con = concatenate([left_model.output, right_model.output])
GAP= GlobalAveragePooling2D()(con)
out = Dense(8, activation = 'sigmoid')(GAP)
model =Model(inputs=[left_input, right_input], outputs=out)
return model
I found a pretty elegant solution. I will explain step by step since may be a bit different of what you thought:
When decoding the images stack both images in a single tensor so the input tensor will be of shape [2, IMAGE_H, IMAGE_W, 3]
def decode_single(im_path, image_size):
bits = tf.io.read_file(im_path)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
return image
# Note that the image paths are packed in a tuple, and we unpack them inside the function
def decode(paths, label=None, image_size=(128, 128)):
image_path1, image_path2 = paths
im1 = decode_single(image_path1, image_size)
im2 = decode_single(image_path2, image_size)
images = tf.stack([im1, im2])
if label is not None:
return images, label
return images
I declare the data pipeline so the paths are packed in a tuple.
label_ds = ...
ds = tf.data.Dataset.from_tensor_slices((left_paths, right_paths))
ds = tf.data.Dataset.zip((ds, label_ds)) # returns as ((im_path1, im_path2), label)) not (im_path1, im_path2, label)
ds = ds.map(decode).batch(4)
print(ds)
# Out: <BatchDataset shapes: ((None, 2, 128, 128, 3), ((None,),)), types: (tf.float32, (tf.int32,))>
Since we are feeding batches of two images (None, 2, 128, 128, 3). Declare the model with a single input of shape (2, HEIGHT, WIDTH, 3) and then we split the input in the two images:
def get_model():
input_layer = Input(shape=(2, IMAGE_H,IMAGE_W,3))
# Split into two images
right_image, left_image = Lambda(lambda x: tf.split(x, 2, axis=1))(input_layer)
right_image = Reshape([IMAGE_H, IMAGE_W, 3])(right_image)
left_image = Reshape([IMAGE_H, IMAGE_W, 3])(left_image)
# Replace by EfficientNets
left_model = Conv2D(64, 3)(left_image)
right_model = Conv2D(64, 3)(right_image)
con = Concatenate(-1)([left_model, right_model])
GAP = GlobalAveragePooling2D()(con)
out = Dense(8, activation = 'sigmoid')(GAP)
model = tf.keras.Model(inputs=input_layer, outputs=out)
return model
Finally compile and train the model as usual:
model = get_model()
model.compile(...)
model.fit(ds, epochs=10)

Dataset input from bmp images only 50% accurate

I've created this graph to try:
Import BMP files and generate label based on their filename (L/R).
Train a network to determine between the left and right eye.
Evaluate the network.
I'm using the new framework and get it all in as a dataset. The code runs, but I only get 50% accuracy (no learning happening).
Can anyone check that the graph is right and it's just my network I need to fix ?
""" Routine for processing Eye Image dataset
determines left/right eye
Using Tensorflow API v1.3
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import fnmatch
import tensorflow as tf
from six.moves import xrange # pylint: disable=redefined-builtin
import nnLayers as nnLayer
IMAGE_SIZE = 460
SCALE_SIZE = 100
NUM_CLASSES = 2
IMAGE_DEPTH = 3
FLAGS = tf.app.flags.FLAGS
# Basic model parameters.
tf.app.flags.DEFINE_integer('batch_size', 200,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_integer('num_epochs', 1001,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('train_directory', './eyeImages',
"""directory of images to process.""")
tf.app.flags.DEFINE_string('test_directory', './eyeTest',
"""directory of images to process.""")
tf.app.flags.DEFINE_string('log_dir', './logs',
"""logging directory""")
def _parse_function(filename, label):
"""Takes filenames and labels and returns
one hot labels and image values"""
#read the file
image_string = tf.read_file(filename)
#decode BMP file
image_decoded = tf.image.decode_bmp(image_string)
#resize accordingly
image = tf.image.resize_images(image_decoded, [SCALE_SIZE, SCALE_SIZE])
#convert label to one hot
one_hot = tf.one_hot(label, NUM_CLASSES)
return image, one_hot
def inference(image):
#shape image for convolution
with tf.name_scope('input_reshape'):
x_image = tf.reshape(image, [-1, SCALE_SIZE, SCALE_SIZE, IMAGE_DEPTH]) #infer number of images, last dimension is features
tf.summary.image('input_images',x_image)
#neural net layers
#100x100x3 -> 50x50x32
h_pool1 = nnLayer.conv_layer(x_image, IMAGE_DEPTH, 5, 32, 'hiddenLayer1', act=tf.nn.relu)
#50x50x32 -> 25x25x64
h_pool2 = nnLayer.conv_layer(h_pool1, 32, 5, 64, 'hiddenLayer2', act=tf.nn.relu)
#25x25x64 -> 1024x2
h_fc1 = nnLayer.fc_layer(h_pool2, 64, 25, 1024, 'fcLayer1', act=tf.nn.relu)
#1024x2 ->1x2
with tf.name_scope('final-layer'):
with tf.name_scope('weights'):
W_fc2 = nnLayer.weight_variable([1024,NUM_CLASSES])
with tf.name_scope('biases'):
b_fc2 = nnLayer.bias_variable([NUM_CLASSES])
y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2
return y_conv
def folderParser(folder):
"""output BMP file names in directory and
label based on file name"""
#create list of filenames in directory
files = os.listdir(folder)
#filter for BMP files
bmpfiles = fnmatch.filter(files, '*.bmp')
#create empty lists
labels = []
fullNames = []
#get the length of the filename and determine left/right label
for i in range(len(bmpfiles)):
length = len(bmpfiles[i])
fullNames.append(folder + '/' + bmpfiles[i])
if (bmpfiles[i][length-17])=='L':
labels.append(1)
else:
labels.append(0)
return fullNames,labels
def main(argv=None): # pylint: disable=unused-argument
#delete the log files if present
#if tf.gfile.Exists(FLAGS.log_dir):
# tf.gfile.DeleteRecursively(FLAGS.log_dir)
#tf.gfile.MakeDirs(FLAGS.log_dir)
#get file names and labels
trainNames, trainLabels = folderParser(FLAGS.train_directory)
testNames, testLabels = folderParser(FLAGS.test_directory)
# create a dataset of the file names and labels
tr_data = tf.contrib.data.Dataset.from_tensor_slices((trainNames, trainLabels))
ts_data = tf.contrib.data.Dataset.from_tensor_slices((testNames, testLabels))
#map the data set from file names to images
tr_data = tr_data.map(_parse_function)
ts_data = ts_data.map(_parse_function)
#shuffle the images
tr_data = tr_data.shuffle(FLAGS.batch_size*2)
ts_data = ts_data.shuffle(FLAGS.batch_size*2)
#create batches
tr_data = tr_data.batch(FLAGS.batch_size)
ts_data = ts_data.batch(FLAGS.batch_size)
#create handle for datasets
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.contrib.data.Iterator.from_string_handle(handle, tr_data.output_types, tr_data.output_shapes)
next_element = iterator.get_next()
#setup iterator
training_iterator = tr_data.make_initializable_iterator()
validation_iterator = ts_data.make_initializable_iterator()
#retrieve next batch
features, labels = iterator.get_next()
#run network
y_conv = inference(features)
#determine softmax and loss function
with tf.variable_scope('softmax_linear') as scope:
diff = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=y_conv)
with tf.name_scope('total'):
cross_entropy = tf.reduce_mean(diff)
tf.summary.scalar('cross_entropy', cross_entropy)
#run gradient descent
with tf.name_scope('train'):
training_op = tf.train.GradientDescentOptimizer(1e-3).minimize(cross_entropy)
#identify correct predictions
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(labels, 1))
#find the accuracy of the model
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)
with tf.Session() as sess:
#initialization of the variables
training_handle = sess.run(training_iterator.string_handle())
validation_handle = sess.run(validation_iterator.string_handle())
sess.run(tf.global_variables_initializer())
#merge all the summaries and write test summaries
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph)
test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')
#run through epochs
for epoch in range(FLAGS.num_epochs):
#initialize the training set for training epoch
sess.run(training_iterator.initializer)
if epoch % 2 ==0:
#initialize validation set
sess.run(validation_iterator.initializer)
#test
summary, acc = sess.run([merged, accuracy], feed_dict={handle: validation_handle})
train_writer.add_summary(summary, epoch) #write to test file
print('step %s, accuracy %s' % (epoch, acc))
else:
#train
sess.run(training_op, feed_dict={handle: training_handle})
#close the log files
train_writer.close()
test_writer.close()
if __name__ == '__main__':
tf.app.run()
Aaron
The answer was image standardization:
image_std = tf.image.per_image_standardization (image_resized)
Without the image standardization the neurons were becoming saturated. Improved the outcome straight away.
Thanks.