How to freeze TensorFlow ckpt with queuerunner? - tensorflow

I used tf.train.shuffle_batch while training, and do not use any placeholder, when I freezed the ckpt to pb file, I did not get any input tensor, though I can feed thing to shuffle_batch tensor, but it need the feed has the same size with the shuffle_batch data. How to fix it? I know I can rewrite a net and restore params, then freeze, but it is not wise?
The train
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *
import os
trainsize=35680
testsize=889
batch_size=32
inputW=224
inputH=480
TRAIN_TFRECORD='./train.tfrecords'
TEST_TFRECORD='./test.tfrecords'
BATCH_CAPACITY=512
MIN_AFTER_DEQU=256
MAX_Cycle=100000
TRAIN_CYCLE=int(trainsize/batch_size)
TEST_CYCLE=int(testsize/batch_size)
learning_rt = 0.001
savepath='./ckpt/'
logpath='./logs/'
def network(inputs,is_train,reuse):
BITW =8
BITA=8
Decay=0.99
Epsi=1e-5
with tf.variable_scope('Model',reuse=reuse):
net=InputLayer(inputs,name='input') #224*480
net=QuanConv2dWithBN(net,32,(3,3),(1,1),'SAME',tf.nn.relu, decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv1_1')
net=QuanConv2dWithBN(net,64,(3,3),(2,2),'SAME',tf.nn.relu, decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv1_2') #112*240
net=QuanConv2dWithBN(net,64,(3,3),(1,1),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv2_1')
net=QuanConv2dWithBN(net,128,(3,3),(2,2),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv2_2') #56*120
net=QuanConv2dWithBN(net,128,(3,3),(1,1),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv3_1')
net=QuanConv2dWithBN(net,64,(1,1),(1,1),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv3_2')
net=QuanConv2dWithBN(net,128,(3,3),(2,2),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv3_3') #28*60
net=QuanConv2dWithBN(net,64,(3,3),(1,1),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv4_1')
net=QuanConv2dWithBN(net,96,(3,3),(2,2),'VALID',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv4_2') #14*30
print(net.outputs)
net=QuanConv2dWithBN(net,128,(3,3),(2,2),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv5_1') #7*30
net=QuanConv2dWithBN(net,128,(3,3),(1,2),'VALID',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv5_2') #3*30
net=QuanConv2d(net,128,(3,3),(1,2),'VALID',tf.nn.leaky_relu,bitW=BITW,bitA=BITA,name='Conv5_3') #1*30
print(net.outputs)
net=FlattenLayer(net,name='flat1')
net=QuanDenseLayer(net,128,act=tf.nn.leaky_relu,bitW=BITW,bitA=BITA,name='dense1')
net=DropoutLayer(net,0.5,is_fix=True,is_train=is_train,name='drop1')
net=DenseLayer(net,1,name='dense2')
outnet=net
volcume=net.outputs
print(volcume)
return outnet,net.outputs,volcume
def inference(inputs,is_train,reuse):
return network(inputs,is_train,reuse)
def read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'img': tf.FixedLenFeature([], tf.string),
'num' : tf.FixedLenFeature([], tf.float32),
})
img = tf.decode_raw(features['img'], tf.uint8)
img = tf.reshape(img, [480, 240, 3])
img=tf.random_crop(img,[480,224,3])
img = tf.image.random_brightness(img, max_delta=0.3)
img = tf.image.random_contrast(img, lower=0.1, upper=0.5)
# img = tf.image.random_hue(img, max_delta=0.1)
# img = tf.image.random_saturation(img, lower=0, upper=2.5)
img = tf.image.per_image_standardization(img)
label = tf.reshape( tf.cast(features['num'], tf.float32)*(1./230.)-0.5,[1])
return img, label
def read_and_decode_test(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'img': tf.FixedLenFeature([], tf.string),
'num' : tf.FixedLenFeature([], tf.float32),
})
img = tf.decode_raw(features['img'], tf.uint8)
img = tf.reshape(img, [480, 240, 3])
img=img[:,8:232,:]
img = tf.image.per_image_standardization(img)
label = tf.reshape( tf.cast(features['num'], tf.float32)*(1./230.)-0.5,[1])
return img, label
def smooth_L1(x):
return tf.where(tf.less_equal(tf.abs(x), 1.0), tf.multiply(0.5, tf.pow(x, 2.0)), tf.subtract(tf.abs(x), 0.5))
def cal_loss(logits,labels):
# return tf.clip_by_value(tf.reduce_mean(tf.losses.mean_squared_error(labels,logits)) ,0.000001,10000000.)
return tf.reduce_mean(tf.where(tf.less_equal(tf.abs(logits-labels), 0.02),0.00001*tf.ones_like(logits-labels), tf.multiply(1., tf.pow(logits-labels, 2.0))))
# return tf.clip_by_value(tf.reduce_sum(smooth_L1(labels-logits)),0.0000001,100.)
def cal_acc(logits,labels):
return tf.reduce_mean( tf.cast( tf.less_equal(tf.abs(labels-logits),tf.ones_like(labels)*.1),tf.float32))
if __name__ == '__main__':
img_train,num_train = read_and_decode(TRAIN_TFRECORD)
img_test,num_test = read_and_decode(TEST_TFRECORD)
img_train_batch, num_train_batch = tf.train.shuffle_batch(
[img_train, num_train], batch_size=batch_size, capacity=BATCH_CAPACITY,
min_after_dequeue=MIN_AFTER_DEQU)
img_test_batch, num_test_batch = tf.train.batch(
[img_test,num_test], batch_size=batch_size)
net,_,logits_train=inference(img_train_batch,True,None)
_,_,logits_test=inference(img_test_batch,False,True)
loss_train=cal_loss(logits_train,num_train_batch)
loss_test=cal_loss(logits_test,num_test_batch)
acc_test=cal_acc(logits_test,num_test_batch)
acc_train=cal_acc(logits_train,num_train_batch)
global_step=tf.train.create_global_step()
#tf.train.get_global_step()
learning_rate=tf.train.exponential_decay(learning_rt, global_step,
5000, 0.9, staircase=True)
train = tf.train.MomentumOptimizer(learning_rate,momentum=0.9).minimize(loss_train,global_step=global_step)
# train = tf.train.AdamOptimizer(learning_rt).minimize(loss_train)
tf.summary.scalar('loss_train', loss_train)
tf.summary.scalar('acc_train', acc_train)
merged = tf.summary.merge_all()
with tf.Session(config=tf.ConfigProto()) as sess:
trainwrite = tf.summary.FileWriter(logpath, sess.graph)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
run_cycle=0
if os.path.exists(savepath+'313.ckpt.index') :
print('\nStart Restore')
saver.restore(sess, savepath+'33.ckpt')
print('\nEnd Restore')
print('\nStart Training')
try:
while not coord.should_stop():
while run_cycle < MAX_Cycle:
run_cycle+=1
# if run_cycle%10==0:
# learning_rt*=0.6
# if run_cycle%200==0:
# learning_rt*=2.
l_tall=0
a_tall=0
l_teall=0
a_teall=0
for train_c in range(TRAIN_CYCLE):
_,l_train,a_train=sess.run([train,loss_train,acc_train])
l_tall+=l_train
a_tall+=a_train
if (train_c+1)%100==0:
print('train_loss:%f'%(l_tall/100.))
print('train_acc:%f'%(a_tall/100.))
l_tall = 0
a_tall = 0
if (train_c+1)%500==0:
print('Global Step:',sess.run(global_step))
result_merged=sess.run(merged)
trainwrite.add_summary(result_merged, run_cycle*TRAIN_CYCLE+train_c)
for test_c in range(TEST_CYCLE):
l_test,a_test=sess.run([loss_test,acc_test])
l_teall+=l_test
a_teall+=a_test
if (test_c+1)%TEST_CYCLE==0:
print('------------------')
print('test_loss:%f'%(l_teall/TEST_CYCLE))
print('test_acc:%f'%(a_teall/TEST_CYCLE))
print('------------------')
l_teall = 0
l_teall = 0
saver.save(sess, savepath+ str(run_cycle) + '.ckpt')
except tf.errors.OutOfRangeError:
print('Done training!!!')
finally:
# When done, ask the threads to stop.
coord.request_stop()
coord.join(threads)
sess.close()
Freeze code
import os, argparse
import tensorflow as tf
from tensorflow.python.framework import graph_util
dir = os.path.dirname(os.path.realpath(__file__))
def freeze_graph(model_folder, output_nodes='y_hat',
output_filename='frozen-graph.pb',
rename_outputs=None):
# Load checkpoint
checkpoint = tf.train.get_checkpoint_state(model_folder)
input_checkpoint = checkpoint.model_checkpoint_path
output_graph = output_filename
# Devices should be cleared to allow Tensorflow to control placement of
# graph when loading on different machines
saver = tf.train.import_meta_graph(input_checkpoint + '.meta',
clear_devices=True)
graph = tf.get_default_graph()
onames = output_nodes.split(',')
# https://stackoverflow.com/a/34399966/4190475
if rename_outputs is not None:
nnames = rename_outputs.split(',')
with graph.as_default():
for o, n in zip(onames, nnames):
_out = tf.identity(graph.get_tensor_by_name(o + ':0'), name=n)
onames = nnames
input_graph_def = graph.as_graph_def()
# fix batch norm nodes
for node in input_graph_def.node:
if node.op == 'RefSwitch':
node.op = 'Switch'
for index in range(len(node.input)):
if 'moving_' in node.input[index]:
node.input[index] = node.input[index] + '/read'
elif node.op == 'AssignSub':
node.op = 'Sub'
if 'use_locking' in node.attr: del node.attr['use_locking']
with tf.Session(graph=graph) as sess:
saver.restore(sess, input_checkpoint)
# In production, graph weights no longer need to be updated
# graph_util provides utility to change all variables to constants
output_graph_def = graph_util.convert_variables_to_constants(
sess, input_graph_def,
onames # unrelated nodes will be discarded
)
# Serialize and write to file
with tf.gfile.GFile(output_graph, "wb") as f:
f.write(output_graph_def.SerializeToString())
print("%d ops in the final graph." % len(output_graph_def.node))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Prune and freeze weights from checkpoints into production models')
parser.add_argument("--checkpoint_path",
default='./regressionDir/',
type=str, help="Path to checkpoint files")
parser.add_argument("--output_nodes",
default='Model/dense2/bias_add',
type=str, help="Names of output node, comma seperated")
parser.add_argument("--output_graph",
default='reg.pb',
type=str, help="Output graph filename")
parser.add_argument("--rename_outputs",
default='out_vol',
type=str, help="Rename output nodes for better \
readability in production graph, to be specified in \
the same order as output_nodes")
args = parser.parse_args()
freeze_graph(args.checkpoint_path, args.output_nodes, args.output_graph, args.rename_outputs)
Test inference code
import tensorflow as tf
import numpy as np
from PIL import Image
import time
gf = tf.GraphDef()
gf.ParseFromString(open('reg.pb', 'rb').read())
print([n.name + '=>' + n.op for n in gf.node])
output_graph_path = './reg.pb'
with tf.Session() as sess:
tf.global_variables_initializer().run()
output_graph_def = tf.GraphDef()
with open(output_graph_path, "rb") as f:
output_graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(output_graph_def, name="")
input_img = sess.graph.get_tensor_by_name("shuffle_batch:0")
print(input_img)
out_vol = sess.graph.get_tensor_by_name("out_vol:0")
out_voln = sess.graph.get_tensor_by_name("shuffle_batch:0")
a = np.random.random([48, 480, 224, 3])
for x in range(100):
ntime1 = time.time()
vol = sess.run(out_vol, {input_img: a})
ntime2 = time.time()
print(ntime2 - ntime1)

Related

Decoding tfrecord with tfslim

I use Python 2.7.13 and Tensorflow 1.3.0 on CPU.
I want to use DensNet( https://github.com/pudae/tensorflow-densenet ) for regression problem. My data contains 60000 jpeg images with 37 float labels for each image.
I saved my data into tfrecords files by:
def Read_Labels(label_path):
labels_csv = pd.read_csv(label_path)
labels = np.array(labels_csv)
return labels[:,1:]
`
def load_image(addr):
# read an image and resize to (224, 224)
img = cv2.imread(addr)
img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)
return img
def Shuffle_images_with_labels(shuffle_data, photo_filenames, labels):
if shuffle_data:
c = list(zip(photo_filenames, labels))
shuffle(c)
addrs, labels = zip(*c)
return addrs, labels
def image_to_tfexample_mine(image_data, image_format, height, width, label):
return tf.train.Example(features=tf.train.Features(feature={
'image/encoded': bytes_feature(image_data),
'image/format': bytes_feature(image_format),
'image/class/label': _float_feature(label),
'image/height': int64_feature(height),
'image/width': int64_feature(width),
}))
def _convert_dataset(split_name, filenames, labels, dataset_dir):
assert split_name in ['train', 'validation']
num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))
with tf.Graph().as_default():
for shard_id in range(_NUM_SHARDS):
output_filename = _get_dataset_filename(dataset_path, split_name, shard_id)
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_ndx = shard_id * num_per_shard
end_ndx = min((shard_id+1) * num_per_shard, len(filenames))
for i in range(start_ndx, end_ndx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i+1, len(filenames), shard_id))
sys.stdout.flush()
img = load_image(filenames[i])
image_data = tf.compat.as_bytes(img.tostring())
label = labels[i]
example = image_to_tfexample_mine(image_data, image_format, height, width, label)
# Serialize to string and write on the file
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()
def run(dataset_dir):
labels = Read_Labels(dataset_dir + '/training_labels.csv')
photo_filenames = _get_filenames_and_classes(dataset_dir + '/images_training')
shuffle_data = True
photo_filenames, labels = Shuffle_images_with_labels(
shuffle_data,photo_filenames, labels)
training_filenames = photo_filenames[_NUM_VALIDATION:]
training_labels = labels[_NUM_VALIDATION:]
validation_filenames = photo_filenames[:_NUM_VALIDATION]
validation_labels = labels[:_NUM_VALIDATION]
_convert_dataset('train',
training_filenames, training_labels, dataset_path)
_convert_dataset('validation',
validation_filenames, validation_labels, dataset_path)
print('\nFinished converting the Flowers dataset!')
And I decode it by:
with tf.Session() as sess:
feature = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/class/label': tf.FixedLenFeature(
[37,], tf.float32, default_value=tf.zeros([37,], dtype=tf.float32)),
}
filename_queue = tf.train.string_input_producer([data_path], num_epochs=1)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features=feature)
image = tf.decode_raw(features['image/encoded'], tf.float32)
print(image.get_shape())
label = tf.cast(features['image/class/label'], tf.float32)
image = tf.reshape(image, [224, 224, 3])
images, labels = tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for batch_index in range(6):
img, lbl = sess.run([images, labels])
img = img.astype(np.uint8)
print(img.shape)
for j in range(6):
plt.subplot(2, 3, j+1)
plt.imshow(img[j, ...])
plt.show()
coord.request_stop()
coord.join(threads)
It's all fine up to this point. But when I use the bellow commands for decoding TFRecord files:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
'image/class/label': tf.FixedLenFeature(
[37,], tf.float32, default_value=tf.zeros([37,], dtype=tf.float32)),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image('image/encoded'),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(
keys_to_features, items_to_handlers)
I get the following error.
INFO:tensorflow:Error reported to Coordinator: , assertion failed: [Unable to decode bytes as JPEG, PNG, GIF, or BMP]
[[Node: case/If_0/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert = Assert[T=[DT_STRING], summarize=3, _device="/job:localhost/replica:0/task:0/cpu:0"](case/If_0/decode_image/cond_jpeg/cond_png/cond_gif/is_bmp, case/If_0/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert/data_0)]]
INFO:tensorflow:Caught OutOfRangeError. Stopping Training.
INFO:sensorflow:Finished training! Saving model to disk.
To use Densenet for my problem, I should fix this error first.
Could anybody please help me out of this problem. This code works perfectly for the datasets like flowers, MNIST and CIFAR10 available at https://github.com/pudae/tensorflow-densenet/tree/master/datasets but does not work for my data.
Thanks to pudae, the problem is solved. I was needed to use:
image_data = tf.gfile.FastGFile(filenames[i], 'rb').read()
Instead of this for loading data. That works perfectly now.
img = load_image(filenames[i])
image_data = tf.compat.as_bytes(img.tostring())
According to the error, I think the problem is that you use an image decoder for array data (decoded data) because you saved decoded data when creating TFRecords. Maybe you have noticed, when you are not using slim, you use tf.decode_raw to decode the data. But when you use slim, the 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw') is not used and by default, slim will use image decoder.
I believe you use the code in slim/data,
where format_key = 'image/format' is you need. So, like this:
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
'image/class/label': tf.FixedLenFeature(
[1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
}
items_to_handlers = {
'image': tfexample_decoder.Image(
image_key = 'image/encoded',
format_key = 'image/format',
'label': tfexample_decoder.Tensor('image/class/label'),
}
decoder = tfexample_decoder.TFExampleDecoder(
keys_to_features, items_to_handlers)
But I am not sure this can solve your problem perfectly because I can't reproduce your work in my machine.
Maybe there is a problem with your image itself as follows:

Tensorflow, read tfrecord without a graph

I tried to write a good structured Neural network model with Tensorflow. But I met a problem about feed the data from tfrecord into the graph. The code is as below, it hangs on at the following function, how can I make it work?
images, labels = network.load_tfrecord_data(1)
this function can not get the features (images) and labels from my datafile, .tfrecords?
Any idea will be appreciated?
from __future__ import division
from __future__ import print_function
import datetime
import numpy as np
import tensorflow as tf
layers = tf.contrib.layers
losses = tf.contrib.losses
metrics = tf.contrib.metrics
LABELS = 10
WIDTH = 28
HEIGHT = 28
HIDDEN = 100
def read_and_decode_single_example(filename):
filename_queue = tf.train.string_input_producer([filename], num_epochs=None)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'image': tf.FixedLenFeature([50176], tf.int64)
})
label = features['label']
image = features['image']
image = tf.reshape(image, [-1, 224, 224, 1])
label = tf.one_hot(label - 1, 11, dtype=tf.int64)
return label, image
class Network:
def __init__(self, logdir, experiment, threads):
# Construct the graph
with tf.name_scope("inputs"):
self.images = tf.placeholder(tf.float32, [None, WIDTH, HEIGHT, 1], name="images")
self.labels = tf.placeholder(tf.int64, [None], name="labels")
# self.keep_prob = keep_prob
self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")
flattened_images = layers.flatten(self.images)
hidden_layer = layers.fully_connected(flattened_images, num_outputs=HIDDEN, activation_fn=tf.nn.relu, scope="hidden_layer")
output_layer = layers.fully_connected(hidden_layer, num_outputs=LABELS, activation_fn=None, scope="output_layer")
loss = losses.sparse_softmax_cross_entropy(labels=self.labels, logits=output_layer, scope="loss")
self.training = layers.optimize_loss(loss, None, None, tf.train.AdamOptimizer(), summaries=['loss', 'gradients', 'gradient_norm'], name='training')
with tf.name_scope("accuracy"):
predictions = tf.argmax(output_layer, 1, name="predictions")
accuracy = metrics.accuracy(predictions, self.labels)
tf.summary.scalar("training/accuracy", accuracy)
self.accuracy = metrics.accuracy(predictions, self.labels)
with tf.name_scope("confusion_matrix"):
confusion_matrix = metrics.confusion_matrix(predictions, self.labels, weights=tf.not_equal(predictions, self.labels), dtype=tf.float32)
confusion_image = tf.reshape(confusion_matrix, [1, LABELS, LABELS, 1])
# Summaries
self.summaries = {'training': tf.summary.merge_all() }
for dataset in ["dev", "test"]:
self.summaries[dataset] = tf.summary.scalar(dataset + "/loss", loss)
self.summaries[dataset] = tf.summary.scalar(dataset + "/accuracy", accuracy)
self.summaries[dataset] = tf.summary.image(dataset + "/confusion_matrix", confusion_image)
# Create the session
self.session = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=threads,
intra_op_parallelism_threads=threads))
self.session.run(tf.global_variables_initializer())
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
self.summary_writer = tf.summary.FileWriter("{}/{}-{}".format(logdir, timestamp, experiment), graph=self.session.graph, flush_secs=10)
self.steps = 0
def train(self, images, labels, keep_prob):
self.steps += 1
feed_dict = {self.images: self.session.run(images), self.labels: self.session.run(labels), self.keep_prob: keep_prob}
if self.steps == 1:
metadata = tf.RunMetadata()
self.session.run(self.training, feed_dict, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=metadata)
self.summary_writer.add_run_metadata(metadata, 'step1')
elif self.steps % 100 == 0:
_, summary = self.session.run([self.training, self.summaries['training']], feed_dict)
self.summary_writer.add_summary(summary, self.steps)
else:
self.session.run(self.training, feed_dict)
def evaluate(self, dataset, images, labels):
feed_dict ={self.images: images, self.labels: labels, self.keep_prob: 1}
summary = self.summaries[dataset].eval({self.images: images, self.labels: labels, self.keep_prob: 1}, self.session)
self.summary_writer.add_summary(summary, self.steps)
def load_tfrecord_data(self, training):
training = training
if training:
label, image = read_and_decode_single_example("mhad_Op_train.tfrecords")
# print(self.session.run(image))
else:
label, image = read_and_decode_single_example("mhad_Op_test.tfrecords")
# image = tf.cast(image, tf.float32) / 255.
images_batch, labels_batch = tf.train.shuffle_batch(
[image, label], batch_size=50, num_threads=2,
capacity=80,
min_after_dequeue=30)
return images_batch, labels_batch
if __name__ == '__main__':
# Fix random seed
np.random.seed(42)
tf.set_random_seed(42)
# Parse arguments
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=256, type=int, help='Batch size.')
parser.add_argument('--epochs', default=50, type=int, help='Number of epochs.')
parser.add_argument('--logdir', default="logs", type=str, help='Logdir name.')
parser.add_argument('--exp', default="mnist-final-confusion_matrix_customized_loss", type=str, help='Experiment name.')
parser.add_argument('--threads', default=1, type=int, help='Maximum number of threads to use.')
args = parser.parse_args()
# Load the data
keep_prob = 1
# Construct the network
network = Network(logdir=args.logdir, experiment=args.exp, threads=args.threads)
# Train
for i in range(args.epochs):
images, labels = network.load_tfrecord_data(1)
network.train(images, labels, keep_prob)
print('current epoch', i)
You need to start the queue before using images, labels in your model.
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
images, labels = network.load_tfrecord_data(1)
...
coord.request_stop()
coord.join(threads)
Check this tutorial for a full example

make tf.Estimator use default graph

I am trying to make use of tensorflow protobuffer feeding pipeline. The easiest way seemed to use tf.estimator.Estimator with tf.contrib.data.TFRecordDataset. However, I came across the issue that it creates a new Graph in spite of being launched within with g.as_default(). In following code I see that both model tensors and tensors returned by the TFRecordDataset are the same before I feed them to Estimator, but become different within the Estimator. Any ideas how to put them on the same graph?
# coding: utf-8
import sys
import tensorflow as tf
from keras.applications.inception_v3 import InceptionV3
import numpy as np
final_activation='linear'
g = tf.Graph()
with g.as_default():
model = InceptionV3(weights='imagenet',
include_top=True,
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000)
def model_fn(mode, features, labels, params):
optimizer = params["optimizer"]
opt_params= params.get("opt_params", {})
predictions = model(features)
if (mode == tf.estimator.ModeKeys.TRAIN or
mode == tf.estimator.ModeKeys.EVAL):
loss = tf.contrib.keras.backend.categorical_crossentropy(predictions, labels)
#loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logyhat)
else:
loss = None
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = getattr(tf.train, optimizer)
train_op = optimizer(opt_params).minimize(loss)
else:
train_op = None
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op)
def parser(record):
keys_to_features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)
}
features = tf.parse_single_example(
record,
features=keys_to_features)
# Convert from a scalar string tensor to a uint8 tensor
image = tf.decode_raw(features['image_raw'], tf.float32)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
image = tf.reshape(image, image_shape)
label = tf.cast(features["label"], tf.int32)
return image, label
def get_dataset_inp_fn(filenames, epochs=20):
def dataset_input_fn():
dataset = tf.contrib.data.TFRecordDataset(filenames)
# Use `Dataset.map()` to build a pair of a feature dictionary and a label
# tensor for each example.
dataset = dataset.map(parser)
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(32)
dataset = dataset.repeat(epochs)
iterator = dataset.make_one_shot_iterator()
features, labels = iterator.get_next()
return features, labels
return dataset_input_fn
inpfun = get_dataset_inp_fn(["mydataset.tfrecords"], epochs=20)
x,y = inpfun()
print("X", x.graph)
print("DEFAULT", g)
print("MODEL", model.input.graph)
# everything is on the same graph
if not x.graph is tf.get_default_graph():
raise ValueError()
with tf.Session(graph=g) as sess:
est = tf.estimator.Estimator(
model_fn,
model_dir=None,
config=None,
params={"optimizer": "AdamOptimizer",
"opt_params":{}}
)
est.train(inpfun)

Dataset input from bmp images only 50% accurate

I've created this graph to try:
Import BMP files and generate label based on their filename (L/R).
Train a network to determine between the left and right eye.
Evaluate the network.
I'm using the new framework and get it all in as a dataset. The code runs, but I only get 50% accuracy (no learning happening).
Can anyone check that the graph is right and it's just my network I need to fix ?
""" Routine for processing Eye Image dataset
determines left/right eye
Using Tensorflow API v1.3
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import fnmatch
import tensorflow as tf
from six.moves import xrange # pylint: disable=redefined-builtin
import nnLayers as nnLayer
IMAGE_SIZE = 460
SCALE_SIZE = 100
NUM_CLASSES = 2
IMAGE_DEPTH = 3
FLAGS = tf.app.flags.FLAGS
# Basic model parameters.
tf.app.flags.DEFINE_integer('batch_size', 200,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_integer('num_epochs', 1001,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('train_directory', './eyeImages',
"""directory of images to process.""")
tf.app.flags.DEFINE_string('test_directory', './eyeTest',
"""directory of images to process.""")
tf.app.flags.DEFINE_string('log_dir', './logs',
"""logging directory""")
def _parse_function(filename, label):
"""Takes filenames and labels and returns
one hot labels and image values"""
#read the file
image_string = tf.read_file(filename)
#decode BMP file
image_decoded = tf.image.decode_bmp(image_string)
#resize accordingly
image = tf.image.resize_images(image_decoded, [SCALE_SIZE, SCALE_SIZE])
#convert label to one hot
one_hot = tf.one_hot(label, NUM_CLASSES)
return image, one_hot
def inference(image):
#shape image for convolution
with tf.name_scope('input_reshape'):
x_image = tf.reshape(image, [-1, SCALE_SIZE, SCALE_SIZE, IMAGE_DEPTH]) #infer number of images, last dimension is features
tf.summary.image('input_images',x_image)
#neural net layers
#100x100x3 -> 50x50x32
h_pool1 = nnLayer.conv_layer(x_image, IMAGE_DEPTH, 5, 32, 'hiddenLayer1', act=tf.nn.relu)
#50x50x32 -> 25x25x64
h_pool2 = nnLayer.conv_layer(h_pool1, 32, 5, 64, 'hiddenLayer2', act=tf.nn.relu)
#25x25x64 -> 1024x2
h_fc1 = nnLayer.fc_layer(h_pool2, 64, 25, 1024, 'fcLayer1', act=tf.nn.relu)
#1024x2 ->1x2
with tf.name_scope('final-layer'):
with tf.name_scope('weights'):
W_fc2 = nnLayer.weight_variable([1024,NUM_CLASSES])
with tf.name_scope('biases'):
b_fc2 = nnLayer.bias_variable([NUM_CLASSES])
y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2
return y_conv
def folderParser(folder):
"""output BMP file names in directory and
label based on file name"""
#create list of filenames in directory
files = os.listdir(folder)
#filter for BMP files
bmpfiles = fnmatch.filter(files, '*.bmp')
#create empty lists
labels = []
fullNames = []
#get the length of the filename and determine left/right label
for i in range(len(bmpfiles)):
length = len(bmpfiles[i])
fullNames.append(folder + '/' + bmpfiles[i])
if (bmpfiles[i][length-17])=='L':
labels.append(1)
else:
labels.append(0)
return fullNames,labels
def main(argv=None): # pylint: disable=unused-argument
#delete the log files if present
#if tf.gfile.Exists(FLAGS.log_dir):
# tf.gfile.DeleteRecursively(FLAGS.log_dir)
#tf.gfile.MakeDirs(FLAGS.log_dir)
#get file names and labels
trainNames, trainLabels = folderParser(FLAGS.train_directory)
testNames, testLabels = folderParser(FLAGS.test_directory)
# create a dataset of the file names and labels
tr_data = tf.contrib.data.Dataset.from_tensor_slices((trainNames, trainLabels))
ts_data = tf.contrib.data.Dataset.from_tensor_slices((testNames, testLabels))
#map the data set from file names to images
tr_data = tr_data.map(_parse_function)
ts_data = ts_data.map(_parse_function)
#shuffle the images
tr_data = tr_data.shuffle(FLAGS.batch_size*2)
ts_data = ts_data.shuffle(FLAGS.batch_size*2)
#create batches
tr_data = tr_data.batch(FLAGS.batch_size)
ts_data = ts_data.batch(FLAGS.batch_size)
#create handle for datasets
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.contrib.data.Iterator.from_string_handle(handle, tr_data.output_types, tr_data.output_shapes)
next_element = iterator.get_next()
#setup iterator
training_iterator = tr_data.make_initializable_iterator()
validation_iterator = ts_data.make_initializable_iterator()
#retrieve next batch
features, labels = iterator.get_next()
#run network
y_conv = inference(features)
#determine softmax and loss function
with tf.variable_scope('softmax_linear') as scope:
diff = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=y_conv)
with tf.name_scope('total'):
cross_entropy = tf.reduce_mean(diff)
tf.summary.scalar('cross_entropy', cross_entropy)
#run gradient descent
with tf.name_scope('train'):
training_op = tf.train.GradientDescentOptimizer(1e-3).minimize(cross_entropy)
#identify correct predictions
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(labels, 1))
#find the accuracy of the model
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)
with tf.Session() as sess:
#initialization of the variables
training_handle = sess.run(training_iterator.string_handle())
validation_handle = sess.run(validation_iterator.string_handle())
sess.run(tf.global_variables_initializer())
#merge all the summaries and write test summaries
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph)
test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')
#run through epochs
for epoch in range(FLAGS.num_epochs):
#initialize the training set for training epoch
sess.run(training_iterator.initializer)
if epoch % 2 ==0:
#initialize validation set
sess.run(validation_iterator.initializer)
#test
summary, acc = sess.run([merged, accuracy], feed_dict={handle: validation_handle})
train_writer.add_summary(summary, epoch) #write to test file
print('step %s, accuracy %s' % (epoch, acc))
else:
#train
sess.run(training_op, feed_dict={handle: training_handle})
#close the log files
train_writer.close()
test_writer.close()
if __name__ == '__main__':
tf.app.run()
Aaron
The answer was image standardization:
image_std = tf.image.per_image_standardization (image_resized)
Without the image standardization the neurons were becoming saturated. Improved the outcome straight away.
Thanks.

No variable to save error in Tensorflow

I am trying to save the model and then reuse it for classifying my images but unfortunately i am getting errors in restoring the model that i have saved.
The code in which model has been created :
# Deep Learning
# =============
#
# Assignment 4
# ------------
# In[25]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
# In[37]:
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
print(test_labels)
# Reformat into a TensorFlow-friendly shape:
# - convolutions need the image data formatted as a cube (width by height by #channels)
# - labels as float 1-hot encodings.
# In[38]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale
import numpy as np
def reformat(dataset, labels):
dataset = dataset.reshape(
(-1, image_size, image_size, num_channels)).astype(np.float32)
#print(np.arange(num_labels))
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
#print(labels[0,:])
print(labels[0])
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
#print(labels[0])
# In[39]:
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
# Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.
# In[47]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
graph = tf.Graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(
tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables.
layer1_weights = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, num_channels, depth], stddev=0.1),name="layer1_weights")
layer1_biases = tf.Variable(tf.zeros([depth]),name = "layer1_biases")
layer2_weights = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, depth, depth], stddev=0.1),name = "layer2_weights")
layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]),name ="layer2_biases")
layer3_weights = tf.Variable(tf.truncated_normal(
[image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1),name="layer3_biases")
layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]),name = "layer3_biases")
layer4_weights = tf.Variable(tf.truncated_normal(
[num_hidden, num_labels], stddev=0.1),name = "layer4_weights")
layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]),name = "layer4_biases")
# Model.
def model(data):
conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer1_biases)
conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
shape = hidden.get_shape().as_list()
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
return tf.matmul(hidden, layer4_weights) + layer4_biases
# Training computation.
logits = model(tf_train_dataset)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
test_prediction = tf.nn.softmax(model(tf_test_dataset))
# In[48]:
num_steps = 1001
#saver = tf.train.Saver()
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print('Initialized')
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
batch_labels = train_labels[offset:(offset + batch_size), :]
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
_, l, predictions = session.run(
[optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 50 == 0):
print('Minibatch loss at step %d: %f' % (step, l))
print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
print('Validation accuracy: %.1f%%' % accuracy(
valid_prediction.eval(), valid_labels))
print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
save_path = tf.train.Saver().save(session, "/tmp/model.ckpt")
print("Model saved in file: %s" % save_path)
Everything works fine and the model is stored in the respective folder .
I have created one more python file where i have tried restoring the model but getting an error there
# In[1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
# In[3]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale
import numpy as np
# In[4]:
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
# In[8]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
graph = tf.Graph()
with graph.as_default():
'''# Input data.
tf_train_dataset = tf.placeholder(
tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)'''
# Variables.
layer1_weights = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, num_channels, depth], stddev=0.1),name="layer1_weights")
layer1_biases = tf.Variable(tf.zeros([depth]),name = "layer1_biases")
layer2_weights = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, depth, depth], stddev=0.1),name = "layer2_weights")
layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]),name ="layer2_biases")
layer3_weights = tf.Variable(tf.truncated_normal(
[image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1),name="layer3_biases")
layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]),name = "layer3_biases")
layer4_weights = tf.Variable(tf.truncated_normal(
[num_hidden, num_labels], stddev=0.1),name = "layer4_weights")
layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]),name = "layer4_biases")
# Model.
def model(data):
conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer1_biases)
conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
shape = hidden.get_shape().as_list()
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
return tf.matmul(hidden, layer4_weights) + layer4_biases
'''# Training computation.
logits = model(tf_train_dataset)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)'''
# Predictions for the training, validation, and test data.
#train_prediction = tf.nn.softmax(logits)
#valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
#test_prediction = tf.nn.softmax(model(tf_test_dataset))
# In[17]:
#saver = tf.train.Saver()
with tf.Session() as sess:
# Restore variables from disk.
tf.train.Saver().restore(sess, "/tmp/model.ckpt")
print("Model restored.")
# Do some work with the model
error that i am getting is :
No variables to save
Any help would be appreciated
The error here is quite subtle. In In[8] you create a tf.Graph called graph and set it as default for the with graph.as_default(): block. This means that all of the variables are created in graph, and if you print graph.all_variables() you should see a list of your variables.
However, you exit the with block before creating (i) the tf.Session, and (ii) the tf.train.Saver. This means that the session and saver are created in a different graph (the global default tf.Graph that is used when you don't explicitly create one and set it as default), which doesn't contain any variables—or any nodes at all.
There are at least two solutions:
As Yaroslav suggests, you can write your program without using the with graph.as_default(): block, which avoids the confusion with multiple graphs. However, this can lead to name collisions between different cells in your IPython notebook, which is awkward when using the tf.train.Saver, since it uses the name property of a tf.Variable as the key in the checkpoint file.
You can create the saver inside the with graph.as_default(): block, and create the tf.Session with an explicit graph, as follows:
with graph.as_default():
# [Variable and model creation goes here.]
saver = tf.train.Saver() # Gets all variables in `graph`.
with tf.Session(graph=graph) as sess:
saver.restore(sess)
# Do some work with the model....
Alternatively, you can create the tf.Session inside the with graph.as_default(): block, in which case it will use graph for all of its operations.
You are creating a new session in In[17] which wipes your variables. Also, you don't need to use with blocks if you only have one default graph and one default session, you can instead do something like this
sess = tf.InteractiveSession()
layer1_weights = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, num_channels, depth], stddev=0.1),name="layer1_weights")
tf.train.Saver().restore(sess, "/tmp/model.ckpt")