Super Resolution on Coral TPU - tensorflow

I'm trying to run a super resolution model on Coral TPU. After integer only quantization, noticed a strong color degradation or gray overcast over SR images, even though they do look sharper than bicubic upscaled ones. Can anyone give me some tips on how to improve it? Code example pasted below. All images are from a super resolution training dataset. An example is linked here.
Top: SR image, Bot: Bicubic upscaled image
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
print(tf.__version__)
# set up representative dataset for quantization
rep_path = r'C:\Users\xxx\Downloads\BSR_bsds500\BSR\BSDS500\data\images'
rep_ds = tf.keras.utils.image_dataset_from_directory(
rep_path,
seed=123,
image_size=(256, 256),
batch_size=1)
def representative_data_gen():
for image_batch, _ in rep_ds:
yield [image_batch]
# load tf SR model and convert to tf lite with quantization.
model = hub.load("https://tfhub.dev/captain-pool/esrgan-tf2/1")
concrete_func = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 256, 256, 3], dtype=tf.float32)])
def f(input):
return concrete_func(input);
converter = tf.lite.TFLiteConverter.from_concrete_functions([f.get_concrete_function()], model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_model_quant = converter.convert()
with tf.io.gfile.GFile('ESRGAN_quant.tflite', 'wb') as f:
f.write(tflite_model_quant)
# run inference for a test image
test_img_path = r'C:\Users\xxx\Downloads\24004.jpg'
lr = tf.io.read_file(test_img_path)
lr = tf.image.decode_jpeg(lr)
lr = tf.expand_dims(lr, axis=0)
lr = tf.cast(lr, tf.uint8)
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path='.\ESRGAN_quant.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.set_tensor(input_details[0]['index'], lr)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
sr = tf.squeeze(output_data, axis=0)
sr = tf.clip_by_value(sr, 0, 255)
sr = tf.round(sr)
sr = tf.cast(sr, tf.uint8)
plt.figure(figsize=(30, 30))
plt.subplot(2, 1, 1)
plt.title(f'ESRGAN (x4)')
plt.imshow(sr.numpy());
bicubic = tf.image.resize(lr, [1024, 1024], tf.image.ResizeMethod.BICUBIC)
bicubic = tf.cast(bicubic, tf.uint8)
plt.subplot(2, 1, 2)
plt.title('Bicubic')
plt.imshow(bicubic.numpy()[0]);

Related

TFGPT2LMHeadModel to TFLite changes the input and output shape

The TFGPT2LMHeadModel convertion to TFlite renders unexpected input and output shape
as oppoed to the pre trained model gpt2-64.tflite , how can we fix the same ?
!wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-64.tflite
import numpy as np
import tensorflow as tf
tflite_model_path = 'gpt2-64.tflite'
# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()
# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_shape = input_details[0]['shape']
#print the output
input_data = np.array(np.random.random_sample((input_shape)), dtype=np.int32)
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data.shape)
print(input_shape)
Gives output as
>(1, 64, 50257)
> [ 1 64]
which is as expected
but when we try to convert TFGPT2LMHeadModel to TFLITE , we get different output as below
import tensorflow as tf
from transformers import TFGPT2LMHeadModel
import numpy as np
model = TFGPT2LMHeadModel.from_pretrained('gpt2') # or 'distilgpt2'
input_spec = tf.TensorSpec([1, 64], tf.int32)
model._set_inputs(input_spec, training=False)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# For FP16 quantization:
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()
open("gpt2-64-2.tflite", "wb").write(tflite_model)
tflite_model_path = 'gpt2-64-2.tflite'
# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()
# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_shape = input_details[0]['shape']
#print the output
input_data = np.array(np.random.random_sample((input_shape)), dtype=np.int32)
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data.shape)
print(input_shape)
Output:
>(2, 1, 12, 1, 64)
>[1 1]
How can we fix the same ?

slow training despite using tf data pipeline

I am training image classification model which contains 21000 image. I have created data pipeline with the help of tf.data API of tensorflow. My issue is that training is too slow despite using API. I have also enabled tensorflow gpu version. Please help me out.I thought first it was due to keras imagedatagenerator which is slowing down my training time but now when I changed it tf.data pipeline it still does not utilizes my gpu. Below is my whole code
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import ResNet50, EfficientNetB3, InceptionV3, DenseNet121
from tensorflow.keras.optimizers import Adam
# ignoring warnings
import warnings
warnings.simplefilter("ignore")
import os,cv2
base_dir = "D:/cassava-leaf-disease-classification/"
train_csv = pd.read_csv("D:/cassava-leaf-disease-classification/train.csv")
# print(train_csv.head())
df_sample = pd.read_csv("D:/cassava-leaf-disease-classification/sample_submission.csv")
train_images = "D:/cassava-leaf-disease-classification/train_images/"+train_csv['image_id']
# print(train_images)
# print(os.listdir(train_images))
train_labels = pd.read_csv(os.path.join(base_dir, "train.csv"))
# print(train_labels)
BATCH_SIZE = 16
EPOCHS = 25
STEPS_PER_EPOCH = len(train_labels)*0.8 / BATCH_SIZE
TARGET_SIZE = 300
# train_labels['label'] = train_labels.label.astype('str')
labels = train_labels.iloc[:,-1].values
# print(labels)
def build_decoder(with_labels=True, target_size=(TARGET_SIZE, TARGET_SIZE), ext='jpg'):
def img_decode(img_path):
file_bytes = tf.io.read_file(img_path)
if ext == 'png':
img = tf.image.decode_png(file_bytes, channels=3)
elif ext in ['jpg', 'jpeg']:
img = tf.image.decode_jpeg(file_bytes, channels=3)
else:
raise ValueError("Image extension not supported")
img = tf.cast(img, tf.float32) / 255.0
img = tf.image.resize(img, target_size)
return img
def decode_with_labels(img_path, label):
return img_decode(img_path), label
if with_labels == True:
return decode_with_labels
else:
return img_decode
def build_augmenter(with_labels=True):
def augment(img):
img = tf.image.random_flip_left_right(img)
img = tf.image.random_flip_up_down(img)
img = tf.image.random_brightness(img, 0.1)
img = tf.image.random_contrast(img, 0.9, 1.1)
img = tf.image.random_saturation(img, 0.9, 1.1)
return img
def augment_with_labels(img, label):
return augment(img), label
if with_labels == True:
return augment_with_labels
else:
return augment
def build_dataset(paths, labels=None, bsize=32, cache=True,
decode_fn=None, augment_fn=None,
augment=True, repeat=True, shuffle=1024,
cache_dir=""):
if cache_dir != "" and cache is True:
os.makedirs(cache_dir, exist_ok=True)
if decode_fn is None:
decode_fn = build_decoder(labels is not None)
if augment_fn is None:
augment_fn = build_augmenter(labels is not None)
AUTO = tf.data.experimental.AUTOTUNE
slices = paths if labels is None else (paths, labels)
dset = tf.data.Dataset.from_tensor_slices(slices)
dset = dset.map(decode_fn, num_parallel_calls=AUTO)
# dset = dset.cache(cache_dir) if cache else dset
dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
dset = dset.repeat() if repeat else dset
dset = dset.shuffle(shuffle) if shuffle else dset
dset = dset.batch(bsize).prefetch(AUTO)
return dset
# Train test split
(train_img, valid_img,train_labels,valid_labels) = train_test_split(train_images,labels,train_size = 0.8,random_state = 0)
# print(train, valid)
# Tensorflow datasets
train_df = build_dataset(
train_img, train_labels, bsize=BATCH_SIZE,
cache=True)
valid_df = build_dataset(
valid_img, valid_labels, bsize=BATCH_SIZE,
repeat=False, shuffle=False, augment=False,
cache=True)
def create_model():
model = models.Sequential()
model.add(EfficientNetB3(include_top=False, weights='imagenet',
input_shape=(TARGET_SIZE,TARGET_SIZE,3)))
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(5,activation='softmax'))
model.compile(optimizer=Adam(lr=0.001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
model = create_model()
model.summary()
model_save = ModelCheckpoint('C:/Users/rosha/PycharmProjects/CLDD/saved_Models/EffNetB3_300_16_best_weights.h5',
save_best_only=True,
save_weights_only=True,
monitor='val_accuracy',
mode='max',
verbose=1
)
early_stop = EarlyStopping(monitor='val_accuracy',
min_delta=0.001,
patience=5,
mode='max',
verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy',
factor=0.3,
patience=2,
min_delta=0.001,
mode='max',
verbose=1)
history = model.fit(
train_df,
validation_data=valid_df,
steps_per_epoch=STEPS_PER_EPOCH,
epochs=EPOCHS,
callbacks=[model_save, early_stop, reduce_lr],
verbose=1,
)
plt.rcParams.update({'font.size': 16})
hist = pd.DataFrame(history.history)
fig, (ax1, ax2) = plt.subplots(figsize=(12, 12), nrows=2, ncols=1)
hist['loss'].plot(ax=ax1, c='k', label='training loss')
hist['val_loss'].plot(ax=ax1, c='r', linestyle='--', label='validation loss')
ax1.legend()
hist['accuracy'].plot(ax=ax2, c='k', label='training accuracy')
hist['val_accuracy'].plot(ax=ax2, c='r', linestyle='--', label='validation accuracy')
ax2.legend()
plt.show()
model.save('./EffNetB3_300_16.h5')
So here is a small checklist I like to go over:
Execute the following code to check whether the GPU is found by tensorflow:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
If the output is "Num GPUs Available: 0", then you should check that you indeed have tensorflow-gpu installed, you might also want to check that support libraries are also in the gpu version.
If your libraries are correct you will need to check to see if your CUDA driver installation is correct. This step is somewhat OS dependent but there are many tutorials online for both. My favourite for TF can be found on the official website: https://www.tensorflow.org/install/gpu

Tensor format issue from converting Pytorch -> Onnx -> Tensorflow

I have an issue with Tensorflow model that is converted from Pytorch -> Onnx -> Tensorflow. The issue is the converted Tensorflow model expects the input in Pytorch format that is (batch size, number channels, height, width) but not in Tensorflow format (batch size, height, width, number channel). Therefore, I cannot use the model to process further with Vitis AI.
So I would like to ask is there is any ways to convert this Pytorch input format to Tensorflow format by using tools from Onnx, Tensorflow 1, or others?
My code is as below:
Pytorch -> Onnx
from hardnet import hardnet
import torch
import onnx
ckpt = torch.load('../hardnet.pth')
model_state_dict = ckpt['model_state_dict']
optimizer_state_dict = ckpt['optimizer_state_dict']
model = hardnet(11)
model.load_state_dict(model_state_dict)
model.eval()
dummy_input = torch.randn(1, 3, 1080, 1920)
input_names = ['input0']
output_names = ['output0']
output_file = 'hardnet.onnx'
torch.onnx.export(model, dummy_input, output_file, verbose=True,
input_names=input_names, output_names=output_names,
opset_version=11, keep_initializers_as_inputs=True)
onnx_model = onnx.load(output_file)
onnx.checker.check_model(onnx_model)
print('Passed Onnx')
Onnx -> Tensorflow 1 (using Tensorflow 1.15)
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import onnx
from onnx_tf.backend import prepare
output_file = 'hardnet.onnx'
onnx_model = onnx.load(output_file)
output = prepare(onnx_model)
output.export_graph('hardnet.pb')
tf.compat.v1.disable_eager_execution()
def load_pb(path_to_pb: str):
"""From: https://stackoverflow.com/questions/51278213/what-is-the-use-of-a-pb-file-in-tensorflow-and-how-does-it-work
"""
with tf.gfile.GFile(path_to_pb, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name='')
return graph
graph = load_pb('hardnet.pb')
input = graph.get_tensor_by_name('input0:0')
output = graph.get_tensor_by_name('output0:0')
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img = cv2.imread('train_0.jpg', cv2.IMREAD_COLOR)
img = cv2.resize(img, (1920, 1080))
img = img/255
img = img - mean
img = img/std
img = np.expand_dims(img, -1)
# To Pytorch format.
img = np.transpose(img, (3, 2, 0, 1))
img = img
with tf.Session(graph=graph) as sess:
pred = sess.run(output, {input: img})
You could wrap your Pytorch model into another one that would do the transpose you want to have in TensorFlow. See the following example:
Let's say you have the following toy NN:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.rnn = nn.LSTM(10, 20, 2)
def forward(self, x):
h0 = torch.zeros(2, 3, 20)
c0 = torch.zeros(2, 3, 20)
return self.rnn(x, (h0, c0))
the exemplary pytorch/tensorflow input shape would be :
>> pytorch_input = torch.randn(5, 3, 10)
>> tf_input = torch.transpose(pytorch_input, 1, 2)
>> print("PyTorch input shape: ", pytorch_input.shape)
>> print("TensorFlow input shape: ", tf_input.shape)
PyTorch input shape: torch.Size([5, 3, 10])
TensorFlow input shape: torch.Size([5, 10, 3])
Now, the wrapper which will first transpose input and then pass transposed input to some model:
class NetTensorFlowWrapper(nn.Module):
def __init__(self, main_module: nn.Module):
super(NetTensorFlowWrapper, self).__init__()
self.main_module = main_module
def forward(self, x):
x = torch.transpose(x, 1, 2)
return self.main_module(x)
Then, this is possible:
net = Net()
net_wrapper = NetTensorFlowWrapper(net)
net(pytorch_input)
net_wrapper(tf_input)
and then, when you finally save your models like you did previously via torch.onnx.export and read their graph via onnx package (not torch.onnx) you will have...
for Net- input 5x3x10 and no transpose layer
graph torch-jit-export (
%input0[FLOAT, 5x3x10]
{
%76 = Shape(%input0)
%77 = Constant[value = <Scalar Tensor []>]()
for NetTensorFlowWrapper- input 5x10x3 and transpose layer
graph torch-jit-export (
%input0[FLOAT, 5x10x3]
{
%9 = Transpose[perm = [0, 2, 1]](%input0)
%77 = Shape(%9)
%78 = Constant[value = <Scalar Tensor []>]()
...

Tensorflow, read tfrecord without a graph

I tried to write a good structured Neural network model with Tensorflow. But I met a problem about feed the data from tfrecord into the graph. The code is as below, it hangs on at the following function, how can I make it work?
images, labels = network.load_tfrecord_data(1)
this function can not get the features (images) and labels from my datafile, .tfrecords?
Any idea will be appreciated?
from __future__ import division
from __future__ import print_function
import datetime
import numpy as np
import tensorflow as tf
layers = tf.contrib.layers
losses = tf.contrib.losses
metrics = tf.contrib.metrics
LABELS = 10
WIDTH = 28
HEIGHT = 28
HIDDEN = 100
def read_and_decode_single_example(filename):
filename_queue = tf.train.string_input_producer([filename], num_epochs=None)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'image': tf.FixedLenFeature([50176], tf.int64)
})
label = features['label']
image = features['image']
image = tf.reshape(image, [-1, 224, 224, 1])
label = tf.one_hot(label - 1, 11, dtype=tf.int64)
return label, image
class Network:
def __init__(self, logdir, experiment, threads):
# Construct the graph
with tf.name_scope("inputs"):
self.images = tf.placeholder(tf.float32, [None, WIDTH, HEIGHT, 1], name="images")
self.labels = tf.placeholder(tf.int64, [None], name="labels")
# self.keep_prob = keep_prob
self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")
flattened_images = layers.flatten(self.images)
hidden_layer = layers.fully_connected(flattened_images, num_outputs=HIDDEN, activation_fn=tf.nn.relu, scope="hidden_layer")
output_layer = layers.fully_connected(hidden_layer, num_outputs=LABELS, activation_fn=None, scope="output_layer")
loss = losses.sparse_softmax_cross_entropy(labels=self.labels, logits=output_layer, scope="loss")
self.training = layers.optimize_loss(loss, None, None, tf.train.AdamOptimizer(), summaries=['loss', 'gradients', 'gradient_norm'], name='training')
with tf.name_scope("accuracy"):
predictions = tf.argmax(output_layer, 1, name="predictions")
accuracy = metrics.accuracy(predictions, self.labels)
tf.summary.scalar("training/accuracy", accuracy)
self.accuracy = metrics.accuracy(predictions, self.labels)
with tf.name_scope("confusion_matrix"):
confusion_matrix = metrics.confusion_matrix(predictions, self.labels, weights=tf.not_equal(predictions, self.labels), dtype=tf.float32)
confusion_image = tf.reshape(confusion_matrix, [1, LABELS, LABELS, 1])
# Summaries
self.summaries = {'training': tf.summary.merge_all() }
for dataset in ["dev", "test"]:
self.summaries[dataset] = tf.summary.scalar(dataset + "/loss", loss)
self.summaries[dataset] = tf.summary.scalar(dataset + "/accuracy", accuracy)
self.summaries[dataset] = tf.summary.image(dataset + "/confusion_matrix", confusion_image)
# Create the session
self.session = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=threads,
intra_op_parallelism_threads=threads))
self.session.run(tf.global_variables_initializer())
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
self.summary_writer = tf.summary.FileWriter("{}/{}-{}".format(logdir, timestamp, experiment), graph=self.session.graph, flush_secs=10)
self.steps = 0
def train(self, images, labels, keep_prob):
self.steps += 1
feed_dict = {self.images: self.session.run(images), self.labels: self.session.run(labels), self.keep_prob: keep_prob}
if self.steps == 1:
metadata = tf.RunMetadata()
self.session.run(self.training, feed_dict, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=metadata)
self.summary_writer.add_run_metadata(metadata, 'step1')
elif self.steps % 100 == 0:
_, summary = self.session.run([self.training, self.summaries['training']], feed_dict)
self.summary_writer.add_summary(summary, self.steps)
else:
self.session.run(self.training, feed_dict)
def evaluate(self, dataset, images, labels):
feed_dict ={self.images: images, self.labels: labels, self.keep_prob: 1}
summary = self.summaries[dataset].eval({self.images: images, self.labels: labels, self.keep_prob: 1}, self.session)
self.summary_writer.add_summary(summary, self.steps)
def load_tfrecord_data(self, training):
training = training
if training:
label, image = read_and_decode_single_example("mhad_Op_train.tfrecords")
# print(self.session.run(image))
else:
label, image = read_and_decode_single_example("mhad_Op_test.tfrecords")
# image = tf.cast(image, tf.float32) / 255.
images_batch, labels_batch = tf.train.shuffle_batch(
[image, label], batch_size=50, num_threads=2,
capacity=80,
min_after_dequeue=30)
return images_batch, labels_batch
if __name__ == '__main__':
# Fix random seed
np.random.seed(42)
tf.set_random_seed(42)
# Parse arguments
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=256, type=int, help='Batch size.')
parser.add_argument('--epochs', default=50, type=int, help='Number of epochs.')
parser.add_argument('--logdir', default="logs", type=str, help='Logdir name.')
parser.add_argument('--exp', default="mnist-final-confusion_matrix_customized_loss", type=str, help='Experiment name.')
parser.add_argument('--threads', default=1, type=int, help='Maximum number of threads to use.')
args = parser.parse_args()
# Load the data
keep_prob = 1
# Construct the network
network = Network(logdir=args.logdir, experiment=args.exp, threads=args.threads)
# Train
for i in range(args.epochs):
images, labels = network.load_tfrecord_data(1)
network.train(images, labels, keep_prob)
print('current epoch', i)
You need to start the queue before using images, labels in your model.
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
images, labels = network.load_tfrecord_data(1)
...
coord.request_stop()
coord.join(threads)
Check this tutorial for a full example

Dataset input from bmp images only 50% accurate

I've created this graph to try:
Import BMP files and generate label based on their filename (L/R).
Train a network to determine between the left and right eye.
Evaluate the network.
I'm using the new framework and get it all in as a dataset. The code runs, but I only get 50% accuracy (no learning happening).
Can anyone check that the graph is right and it's just my network I need to fix ?
""" Routine for processing Eye Image dataset
determines left/right eye
Using Tensorflow API v1.3
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import fnmatch
import tensorflow as tf
from six.moves import xrange # pylint: disable=redefined-builtin
import nnLayers as nnLayer
IMAGE_SIZE = 460
SCALE_SIZE = 100
NUM_CLASSES = 2
IMAGE_DEPTH = 3
FLAGS = tf.app.flags.FLAGS
# Basic model parameters.
tf.app.flags.DEFINE_integer('batch_size', 200,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_integer('num_epochs', 1001,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('train_directory', './eyeImages',
"""directory of images to process.""")
tf.app.flags.DEFINE_string('test_directory', './eyeTest',
"""directory of images to process.""")
tf.app.flags.DEFINE_string('log_dir', './logs',
"""logging directory""")
def _parse_function(filename, label):
"""Takes filenames and labels and returns
one hot labels and image values"""
#read the file
image_string = tf.read_file(filename)
#decode BMP file
image_decoded = tf.image.decode_bmp(image_string)
#resize accordingly
image = tf.image.resize_images(image_decoded, [SCALE_SIZE, SCALE_SIZE])
#convert label to one hot
one_hot = tf.one_hot(label, NUM_CLASSES)
return image, one_hot
def inference(image):
#shape image for convolution
with tf.name_scope('input_reshape'):
x_image = tf.reshape(image, [-1, SCALE_SIZE, SCALE_SIZE, IMAGE_DEPTH]) #infer number of images, last dimension is features
tf.summary.image('input_images',x_image)
#neural net layers
#100x100x3 -> 50x50x32
h_pool1 = nnLayer.conv_layer(x_image, IMAGE_DEPTH, 5, 32, 'hiddenLayer1', act=tf.nn.relu)
#50x50x32 -> 25x25x64
h_pool2 = nnLayer.conv_layer(h_pool1, 32, 5, 64, 'hiddenLayer2', act=tf.nn.relu)
#25x25x64 -> 1024x2
h_fc1 = nnLayer.fc_layer(h_pool2, 64, 25, 1024, 'fcLayer1', act=tf.nn.relu)
#1024x2 ->1x2
with tf.name_scope('final-layer'):
with tf.name_scope('weights'):
W_fc2 = nnLayer.weight_variable([1024,NUM_CLASSES])
with tf.name_scope('biases'):
b_fc2 = nnLayer.bias_variable([NUM_CLASSES])
y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2
return y_conv
def folderParser(folder):
"""output BMP file names in directory and
label based on file name"""
#create list of filenames in directory
files = os.listdir(folder)
#filter for BMP files
bmpfiles = fnmatch.filter(files, '*.bmp')
#create empty lists
labels = []
fullNames = []
#get the length of the filename and determine left/right label
for i in range(len(bmpfiles)):
length = len(bmpfiles[i])
fullNames.append(folder + '/' + bmpfiles[i])
if (bmpfiles[i][length-17])=='L':
labels.append(1)
else:
labels.append(0)
return fullNames,labels
def main(argv=None): # pylint: disable=unused-argument
#delete the log files if present
#if tf.gfile.Exists(FLAGS.log_dir):
# tf.gfile.DeleteRecursively(FLAGS.log_dir)
#tf.gfile.MakeDirs(FLAGS.log_dir)
#get file names and labels
trainNames, trainLabels = folderParser(FLAGS.train_directory)
testNames, testLabels = folderParser(FLAGS.test_directory)
# create a dataset of the file names and labels
tr_data = tf.contrib.data.Dataset.from_tensor_slices((trainNames, trainLabels))
ts_data = tf.contrib.data.Dataset.from_tensor_slices((testNames, testLabels))
#map the data set from file names to images
tr_data = tr_data.map(_parse_function)
ts_data = ts_data.map(_parse_function)
#shuffle the images
tr_data = tr_data.shuffle(FLAGS.batch_size*2)
ts_data = ts_data.shuffle(FLAGS.batch_size*2)
#create batches
tr_data = tr_data.batch(FLAGS.batch_size)
ts_data = ts_data.batch(FLAGS.batch_size)
#create handle for datasets
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.contrib.data.Iterator.from_string_handle(handle, tr_data.output_types, tr_data.output_shapes)
next_element = iterator.get_next()
#setup iterator
training_iterator = tr_data.make_initializable_iterator()
validation_iterator = ts_data.make_initializable_iterator()
#retrieve next batch
features, labels = iterator.get_next()
#run network
y_conv = inference(features)
#determine softmax and loss function
with tf.variable_scope('softmax_linear') as scope:
diff = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=y_conv)
with tf.name_scope('total'):
cross_entropy = tf.reduce_mean(diff)
tf.summary.scalar('cross_entropy', cross_entropy)
#run gradient descent
with tf.name_scope('train'):
training_op = tf.train.GradientDescentOptimizer(1e-3).minimize(cross_entropy)
#identify correct predictions
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(labels, 1))
#find the accuracy of the model
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)
with tf.Session() as sess:
#initialization of the variables
training_handle = sess.run(training_iterator.string_handle())
validation_handle = sess.run(validation_iterator.string_handle())
sess.run(tf.global_variables_initializer())
#merge all the summaries and write test summaries
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph)
test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')
#run through epochs
for epoch in range(FLAGS.num_epochs):
#initialize the training set for training epoch
sess.run(training_iterator.initializer)
if epoch % 2 ==0:
#initialize validation set
sess.run(validation_iterator.initializer)
#test
summary, acc = sess.run([merged, accuracy], feed_dict={handle: validation_handle})
train_writer.add_summary(summary, epoch) #write to test file
print('step %s, accuracy %s' % (epoch, acc))
else:
#train
sess.run(training_op, feed_dict={handle: training_handle})
#close the log files
train_writer.close()
test_writer.close()
if __name__ == '__main__':
tf.app.run()
Aaron
The answer was image standardization:
image_std = tf.image.per_image_standardization (image_resized)
Without the image standardization the neurons were becoming saturated. Improved the outcome straight away.
Thanks.