How to freeze TensorFlow ckpt with queuerunner? - tensorflow
I used tf.train.shuffle_batch while training, and do not use any placeholder, when I freezed the ckpt to pb file, I did not get any input tensor, though I can feed thing to shuffle_batch tensor, but it need the feed has the same size with the shuffle_batch data. How to fix it? I know I can rewrite a net and restore params, then freeze, but it is not wise?
The train
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *
import os
trainsize=35680
testsize=889
batch_size=32
inputW=224
inputH=480
TRAIN_TFRECORD='./train.tfrecords'
TEST_TFRECORD='./test.tfrecords'
BATCH_CAPACITY=512
MIN_AFTER_DEQU=256
MAX_Cycle=100000
TRAIN_CYCLE=int(trainsize/batch_size)
TEST_CYCLE=int(testsize/batch_size)
learning_rt = 0.001
savepath='./ckpt/'
logpath='./logs/'
def network(inputs,is_train,reuse):
BITW =8
BITA=8
Decay=0.99
Epsi=1e-5
with tf.variable_scope('Model',reuse=reuse):
net=InputLayer(inputs,name='input') #224*480
net=QuanConv2dWithBN(net,32,(3,3),(1,1),'SAME',tf.nn.relu, decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv1_1')
net=QuanConv2dWithBN(net,64,(3,3),(2,2),'SAME',tf.nn.relu, decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv1_2') #112*240
net=QuanConv2dWithBN(net,64,(3,3),(1,1),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv2_1')
net=QuanConv2dWithBN(net,128,(3,3),(2,2),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv2_2') #56*120
net=QuanConv2dWithBN(net,128,(3,3),(1,1),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv3_1')
net=QuanConv2dWithBN(net,64,(1,1),(1,1),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv3_2')
net=QuanConv2dWithBN(net,128,(3,3),(2,2),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv3_3') #28*60
net=QuanConv2dWithBN(net,64,(3,3),(1,1),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv4_1')
net=QuanConv2dWithBN(net,96,(3,3),(2,2),'VALID',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv4_2') #14*30
print(net.outputs)
net=QuanConv2dWithBN(net,128,(3,3),(2,2),'SAME',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv5_1') #7*30
net=QuanConv2dWithBN(net,128,(3,3),(1,2),'VALID',tf.nn.relu,decay=Decay, epsilon=Epsi, is_train=is_train, bitW=BITW,bitA=BITA,name='Conv5_2') #3*30
net=QuanConv2d(net,128,(3,3),(1,2),'VALID',tf.nn.leaky_relu,bitW=BITW,bitA=BITA,name='Conv5_3') #1*30
print(net.outputs)
net=FlattenLayer(net,name='flat1')
net=QuanDenseLayer(net,128,act=tf.nn.leaky_relu,bitW=BITW,bitA=BITA,name='dense1')
net=DropoutLayer(net,0.5,is_fix=True,is_train=is_train,name='drop1')
net=DenseLayer(net,1,name='dense2')
outnet=net
volcume=net.outputs
print(volcume)
return outnet,net.outputs,volcume
def inference(inputs,is_train,reuse):
return network(inputs,is_train,reuse)
def read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'img': tf.FixedLenFeature([], tf.string),
'num' : tf.FixedLenFeature([], tf.float32),
})
img = tf.decode_raw(features['img'], tf.uint8)
img = tf.reshape(img, [480, 240, 3])
img=tf.random_crop(img,[480,224,3])
img = tf.image.random_brightness(img, max_delta=0.3)
img = tf.image.random_contrast(img, lower=0.1, upper=0.5)
# img = tf.image.random_hue(img, max_delta=0.1)
# img = tf.image.random_saturation(img, lower=0, upper=2.5)
img = tf.image.per_image_standardization(img)
label = tf.reshape( tf.cast(features['num'], tf.float32)*(1./230.)-0.5,[1])
return img, label
def read_and_decode_test(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'img': tf.FixedLenFeature([], tf.string),
'num' : tf.FixedLenFeature([], tf.float32),
})
img = tf.decode_raw(features['img'], tf.uint8)
img = tf.reshape(img, [480, 240, 3])
img=img[:,8:232,:]
img = tf.image.per_image_standardization(img)
label = tf.reshape( tf.cast(features['num'], tf.float32)*(1./230.)-0.5,[1])
return img, label
def smooth_L1(x):
return tf.where(tf.less_equal(tf.abs(x), 1.0), tf.multiply(0.5, tf.pow(x, 2.0)), tf.subtract(tf.abs(x), 0.5))
def cal_loss(logits,labels):
# return tf.clip_by_value(tf.reduce_mean(tf.losses.mean_squared_error(labels,logits)) ,0.000001,10000000.)
return tf.reduce_mean(tf.where(tf.less_equal(tf.abs(logits-labels), 0.02),0.00001*tf.ones_like(logits-labels), tf.multiply(1., tf.pow(logits-labels, 2.0))))
# return tf.clip_by_value(tf.reduce_sum(smooth_L1(labels-logits)),0.0000001,100.)
def cal_acc(logits,labels):
return tf.reduce_mean( tf.cast( tf.less_equal(tf.abs(labels-logits),tf.ones_like(labels)*.1),tf.float32))
if __name__ == '__main__':
img_train,num_train = read_and_decode(TRAIN_TFRECORD)
img_test,num_test = read_and_decode(TEST_TFRECORD)
img_train_batch, num_train_batch = tf.train.shuffle_batch(
[img_train, num_train], batch_size=batch_size, capacity=BATCH_CAPACITY,
min_after_dequeue=MIN_AFTER_DEQU)
img_test_batch, num_test_batch = tf.train.batch(
[img_test,num_test], batch_size=batch_size)
net,_,logits_train=inference(img_train_batch,True,None)
_,_,logits_test=inference(img_test_batch,False,True)
loss_train=cal_loss(logits_train,num_train_batch)
loss_test=cal_loss(logits_test,num_test_batch)
acc_test=cal_acc(logits_test,num_test_batch)
acc_train=cal_acc(logits_train,num_train_batch)
global_step=tf.train.create_global_step()
#tf.train.get_global_step()
learning_rate=tf.train.exponential_decay(learning_rt, global_step,
5000, 0.9, staircase=True)
train = tf.train.MomentumOptimizer(learning_rate,momentum=0.9).minimize(loss_train,global_step=global_step)
# train = tf.train.AdamOptimizer(learning_rt).minimize(loss_train)
tf.summary.scalar('loss_train', loss_train)
tf.summary.scalar('acc_train', acc_train)
merged = tf.summary.merge_all()
with tf.Session(config=tf.ConfigProto()) as sess:
trainwrite = tf.summary.FileWriter(logpath, sess.graph)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
run_cycle=0
if os.path.exists(savepath+'313.ckpt.index') :
print('\nStart Restore')
saver.restore(sess, savepath+'33.ckpt')
print('\nEnd Restore')
print('\nStart Training')
try:
while not coord.should_stop():
while run_cycle < MAX_Cycle:
run_cycle+=1
# if run_cycle%10==0:
# learning_rt*=0.6
# if run_cycle%200==0:
# learning_rt*=2.
l_tall=0
a_tall=0
l_teall=0
a_teall=0
for train_c in range(TRAIN_CYCLE):
_,l_train,a_train=sess.run([train,loss_train,acc_train])
l_tall+=l_train
a_tall+=a_train
if (train_c+1)%100==0:
print('train_loss:%f'%(l_tall/100.))
print('train_acc:%f'%(a_tall/100.))
l_tall = 0
a_tall = 0
if (train_c+1)%500==0:
print('Global Step:',sess.run(global_step))
result_merged=sess.run(merged)
trainwrite.add_summary(result_merged, run_cycle*TRAIN_CYCLE+train_c)
for test_c in range(TEST_CYCLE):
l_test,a_test=sess.run([loss_test,acc_test])
l_teall+=l_test
a_teall+=a_test
if (test_c+1)%TEST_CYCLE==0:
print('------------------')
print('test_loss:%f'%(l_teall/TEST_CYCLE))
print('test_acc:%f'%(a_teall/TEST_CYCLE))
print('------------------')
l_teall = 0
l_teall = 0
saver.save(sess, savepath+ str(run_cycle) + '.ckpt')
except tf.errors.OutOfRangeError:
print('Done training!!!')
finally:
# When done, ask the threads to stop.
coord.request_stop()
coord.join(threads)
sess.close()
Freeze code
import os, argparse
import tensorflow as tf
from tensorflow.python.framework import graph_util
dir = os.path.dirname(os.path.realpath(__file__))
def freeze_graph(model_folder, output_nodes='y_hat',
output_filename='frozen-graph.pb',
rename_outputs=None):
# Load checkpoint
checkpoint = tf.train.get_checkpoint_state(model_folder)
input_checkpoint = checkpoint.model_checkpoint_path
output_graph = output_filename
# Devices should be cleared to allow Tensorflow to control placement of
# graph when loading on different machines
saver = tf.train.import_meta_graph(input_checkpoint + '.meta',
clear_devices=True)
graph = tf.get_default_graph()
onames = output_nodes.split(',')
# https://stackoverflow.com/a/34399966/4190475
if rename_outputs is not None:
nnames = rename_outputs.split(',')
with graph.as_default():
for o, n in zip(onames, nnames):
_out = tf.identity(graph.get_tensor_by_name(o + ':0'), name=n)
onames = nnames
input_graph_def = graph.as_graph_def()
# fix batch norm nodes
for node in input_graph_def.node:
if node.op == 'RefSwitch':
node.op = 'Switch'
for index in range(len(node.input)):
if 'moving_' in node.input[index]:
node.input[index] = node.input[index] + '/read'
elif node.op == 'AssignSub':
node.op = 'Sub'
if 'use_locking' in node.attr: del node.attr['use_locking']
with tf.Session(graph=graph) as sess:
saver.restore(sess, input_checkpoint)
# In production, graph weights no longer need to be updated
# graph_util provides utility to change all variables to constants
output_graph_def = graph_util.convert_variables_to_constants(
sess, input_graph_def,
onames # unrelated nodes will be discarded
)
# Serialize and write to file
with tf.gfile.GFile(output_graph, "wb") as f:
f.write(output_graph_def.SerializeToString())
print("%d ops in the final graph." % len(output_graph_def.node))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Prune and freeze weights from checkpoints into production models')
parser.add_argument("--checkpoint_path",
default='./regressionDir/',
type=str, help="Path to checkpoint files")
parser.add_argument("--output_nodes",
default='Model/dense2/bias_add',
type=str, help="Names of output node, comma seperated")
parser.add_argument("--output_graph",
default='reg.pb',
type=str, help="Output graph filename")
parser.add_argument("--rename_outputs",
default='out_vol',
type=str, help="Rename output nodes for better \
readability in production graph, to be specified in \
the same order as output_nodes")
args = parser.parse_args()
freeze_graph(args.checkpoint_path, args.output_nodes, args.output_graph, args.rename_outputs)
Test inference code
import tensorflow as tf
import numpy as np
from PIL import Image
import time
gf = tf.GraphDef()
gf.ParseFromString(open('reg.pb', 'rb').read())
print([n.name + '=>' + n.op for n in gf.node])
output_graph_path = './reg.pb'
with tf.Session() as sess:
tf.global_variables_initializer().run()
output_graph_def = tf.GraphDef()
with open(output_graph_path, "rb") as f:
output_graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(output_graph_def, name="")
input_img = sess.graph.get_tensor_by_name("shuffle_batch:0")
print(input_img)
out_vol = sess.graph.get_tensor_by_name("out_vol:0")
out_voln = sess.graph.get_tensor_by_name("shuffle_batch:0")
a = np.random.random([48, 480, 224, 3])
for x in range(100):
ntime1 = time.time()
vol = sess.run(out_vol, {input_img: a})
ntime2 = time.time()
print(ntime2 - ntime1)
Related
Decoding tfrecord with tfslim
I use Python 2.7.13 and Tensorflow 1.3.0 on CPU. I want to use DensNet( https://github.com/pudae/tensorflow-densenet ) for regression problem. My data contains 60000 jpeg images with 37 float labels for each image. I saved my data into tfrecords files by: def Read_Labels(label_path): labels_csv = pd.read_csv(label_path) labels = np.array(labels_csv) return labels[:,1:] ` def load_image(addr): # read an image and resize to (224, 224) img = cv2.imread(addr) img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img.astype(np.float32) return img def Shuffle_images_with_labels(shuffle_data, photo_filenames, labels): if shuffle_data: c = list(zip(photo_filenames, labels)) shuffle(c) addrs, labels = zip(*c) return addrs, labels def image_to_tfexample_mine(image_data, image_format, height, width, label): return tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': bytes_feature(image_data), 'image/format': bytes_feature(image_format), 'image/class/label': _float_feature(label), 'image/height': int64_feature(height), 'image/width': int64_feature(width), })) def _convert_dataset(split_name, filenames, labels, dataset_dir): assert split_name in ['train', 'validation'] num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS))) with tf.Graph().as_default(): for shard_id in range(_NUM_SHARDS): output_filename = _get_dataset_filename(dataset_path, split_name, shard_id) with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id+1) * num_per_shard, len(filenames)) for i in range(start_ndx, end_ndx): sys.stdout.write('\r>> Converting image %d/%d shard %d' % ( i+1, len(filenames), shard_id)) sys.stdout.flush() img = load_image(filenames[i]) image_data = tf.compat.as_bytes(img.tostring()) label = labels[i] example = image_to_tfexample_mine(image_data, image_format, height, width, label) # Serialize to string and write on the file tfrecord_writer.write(example.SerializeToString()) sys.stdout.write('\n') sys.stdout.flush() def run(dataset_dir): labels = Read_Labels(dataset_dir + '/training_labels.csv') photo_filenames = _get_filenames_and_classes(dataset_dir + '/images_training') shuffle_data = True photo_filenames, labels = Shuffle_images_with_labels( shuffle_data,photo_filenames, labels) training_filenames = photo_filenames[_NUM_VALIDATION:] training_labels = labels[_NUM_VALIDATION:] validation_filenames = photo_filenames[:_NUM_VALIDATION] validation_labels = labels[:_NUM_VALIDATION] _convert_dataset('train', training_filenames, training_labels, dataset_path) _convert_dataset('validation', validation_filenames, validation_labels, dataset_path) print('\nFinished converting the Flowers dataset!') And I decode it by: with tf.Session() as sess: feature = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature( [37,], tf.float32, default_value=tf.zeros([37,], dtype=tf.float32)), } filename_queue = tf.train.string_input_producer([data_path], num_epochs=1) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features=feature) image = tf.decode_raw(features['image/encoded'], tf.float32) print(image.get_shape()) label = tf.cast(features['image/class/label'], tf.float32) image = tf.reshape(image, [224, 224, 3]) images, labels = tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for batch_index in range(6): img, lbl = sess.run([images, labels]) img = img.astype(np.uint8) print(img.shape) for j in range(6): plt.subplot(2, 3, j+1) plt.imshow(img[j, ...]) plt.show() coord.request_stop() coord.join(threads) It's all fine up to this point. But when I use the bellow commands for decoding TFRecord files: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'), 'image/class/label': tf.FixedLenFeature( [37,], tf.float32, default_value=tf.zeros([37,], dtype=tf.float32)), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded'), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) I get the following error. INFO:tensorflow:Error reported to Coordinator: , assertion failed: [Unable to decode bytes as JPEG, PNG, GIF, or BMP] [[Node: case/If_0/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert = Assert[T=[DT_STRING], summarize=3, _device="/job:localhost/replica:0/task:0/cpu:0"](case/If_0/decode_image/cond_jpeg/cond_png/cond_gif/is_bmp, case/If_0/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert/data_0)]] INFO:tensorflow:Caught OutOfRangeError. Stopping Training. INFO:sensorflow:Finished training! Saving model to disk. To use Densenet for my problem, I should fix this error first. Could anybody please help me out of this problem. This code works perfectly for the datasets like flowers, MNIST and CIFAR10 available at https://github.com/pudae/tensorflow-densenet/tree/master/datasets but does not work for my data.
Thanks to pudae, the problem is solved. I was needed to use: image_data = tf.gfile.FastGFile(filenames[i], 'rb').read() Instead of this for loading data. That works perfectly now. img = load_image(filenames[i]) image_data = tf.compat.as_bytes(img.tostring())
According to the error, I think the problem is that you use an image decoder for array data (decoded data) because you saved decoded data when creating TFRecords. Maybe you have noticed, when you are not using slim, you use tf.decode_raw to decode the data. But when you use slim, the 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw') is not used and by default, slim will use image decoder. I believe you use the code in slim/data, where format_key = 'image/format' is you need. So, like this: keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'), 'image/class/label': tf.FixedLenFeature( [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)), } items_to_handlers = { 'image': tfexample_decoder.Image( image_key = 'image/encoded', format_key = 'image/format', 'label': tfexample_decoder.Tensor('image/class/label'), } decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) But I am not sure this can solve your problem perfectly because I can't reproduce your work in my machine.
Maybe there is a problem with your image itself as follows:
Tensorflow, read tfrecord without a graph
I tried to write a good structured Neural network model with Tensorflow. But I met a problem about feed the data from tfrecord into the graph. The code is as below, it hangs on at the following function, how can I make it work? images, labels = network.load_tfrecord_data(1) this function can not get the features (images) and labels from my datafile, .tfrecords? Any idea will be appreciated? from __future__ import division from __future__ import print_function import datetime import numpy as np import tensorflow as tf layers = tf.contrib.layers losses = tf.contrib.losses metrics = tf.contrib.metrics LABELS = 10 WIDTH = 28 HEIGHT = 28 HIDDEN = 100 def read_and_decode_single_example(filename): filename_queue = tf.train.string_input_producer([filename], num_epochs=None) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'image': tf.FixedLenFeature([50176], tf.int64) }) label = features['label'] image = features['image'] image = tf.reshape(image, [-1, 224, 224, 1]) label = tf.one_hot(label - 1, 11, dtype=tf.int64) return label, image class Network: def __init__(self, logdir, experiment, threads): # Construct the graph with tf.name_scope("inputs"): self.images = tf.placeholder(tf.float32, [None, WIDTH, HEIGHT, 1], name="images") self.labels = tf.placeholder(tf.int64, [None], name="labels") # self.keep_prob = keep_prob self.keep_prob = tf.placeholder(tf.float32, name="keep_prob") flattened_images = layers.flatten(self.images) hidden_layer = layers.fully_connected(flattened_images, num_outputs=HIDDEN, activation_fn=tf.nn.relu, scope="hidden_layer") output_layer = layers.fully_connected(hidden_layer, num_outputs=LABELS, activation_fn=None, scope="output_layer") loss = losses.sparse_softmax_cross_entropy(labels=self.labels, logits=output_layer, scope="loss") self.training = layers.optimize_loss(loss, None, None, tf.train.AdamOptimizer(), summaries=['loss', 'gradients', 'gradient_norm'], name='training') with tf.name_scope("accuracy"): predictions = tf.argmax(output_layer, 1, name="predictions") accuracy = metrics.accuracy(predictions, self.labels) tf.summary.scalar("training/accuracy", accuracy) self.accuracy = metrics.accuracy(predictions, self.labels) with tf.name_scope("confusion_matrix"): confusion_matrix = metrics.confusion_matrix(predictions, self.labels, weights=tf.not_equal(predictions, self.labels), dtype=tf.float32) confusion_image = tf.reshape(confusion_matrix, [1, LABELS, LABELS, 1]) # Summaries self.summaries = {'training': tf.summary.merge_all() } for dataset in ["dev", "test"]: self.summaries[dataset] = tf.summary.scalar(dataset + "/loss", loss) self.summaries[dataset] = tf.summary.scalar(dataset + "/accuracy", accuracy) self.summaries[dataset] = tf.summary.image(dataset + "/confusion_matrix", confusion_image) # Create the session self.session = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=threads, intra_op_parallelism_threads=threads)) self.session.run(tf.global_variables_initializer()) timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S") self.summary_writer = tf.summary.FileWriter("{}/{}-{}".format(logdir, timestamp, experiment), graph=self.session.graph, flush_secs=10) self.steps = 0 def train(self, images, labels, keep_prob): self.steps += 1 feed_dict = {self.images: self.session.run(images), self.labels: self.session.run(labels), self.keep_prob: keep_prob} if self.steps == 1: metadata = tf.RunMetadata() self.session.run(self.training, feed_dict, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=metadata) self.summary_writer.add_run_metadata(metadata, 'step1') elif self.steps % 100 == 0: _, summary = self.session.run([self.training, self.summaries['training']], feed_dict) self.summary_writer.add_summary(summary, self.steps) else: self.session.run(self.training, feed_dict) def evaluate(self, dataset, images, labels): feed_dict ={self.images: images, self.labels: labels, self.keep_prob: 1} summary = self.summaries[dataset].eval({self.images: images, self.labels: labels, self.keep_prob: 1}, self.session) self.summary_writer.add_summary(summary, self.steps) def load_tfrecord_data(self, training): training = training if training: label, image = read_and_decode_single_example("mhad_Op_train.tfrecords") # print(self.session.run(image)) else: label, image = read_and_decode_single_example("mhad_Op_test.tfrecords") # image = tf.cast(image, tf.float32) / 255. images_batch, labels_batch = tf.train.shuffle_batch( [image, label], batch_size=50, num_threads=2, capacity=80, min_after_dequeue=30) return images_batch, labels_batch if __name__ == '__main__': # Fix random seed np.random.seed(42) tf.set_random_seed(42) # Parse arguments import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', default=256, type=int, help='Batch size.') parser.add_argument('--epochs', default=50, type=int, help='Number of epochs.') parser.add_argument('--logdir', default="logs", type=str, help='Logdir name.') parser.add_argument('--exp', default="mnist-final-confusion_matrix_customized_loss", type=str, help='Experiment name.') parser.add_argument('--threads', default=1, type=int, help='Maximum number of threads to use.') args = parser.parse_args() # Load the data keep_prob = 1 # Construct the network network = Network(logdir=args.logdir, experiment=args.exp, threads=args.threads) # Train for i in range(args.epochs): images, labels = network.load_tfrecord_data(1) network.train(images, labels, keep_prob) print('current epoch', i)
You need to start the queue before using images, labels in your model. with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) images, labels = network.load_tfrecord_data(1) ... coord.request_stop() coord.join(threads) Check this tutorial for a full example
make tf.Estimator use default graph
I am trying to make use of tensorflow protobuffer feeding pipeline. The easiest way seemed to use tf.estimator.Estimator with tf.contrib.data.TFRecordDataset. However, I came across the issue that it creates a new Graph in spite of being launched within with g.as_default(). In following code I see that both model tensors and tensors returned by the TFRecordDataset are the same before I feed them to Estimator, but become different within the Estimator. Any ideas how to put them on the same graph? # coding: utf-8 import sys import tensorflow as tf from keras.applications.inception_v3 import InceptionV3 import numpy as np final_activation='linear' g = tf.Graph() with g.as_default(): model = InceptionV3(weights='imagenet', include_top=True, input_tensor=None, input_shape=None, pooling=None, classes=1000) def model_fn(mode, features, labels, params): optimizer = params["optimizer"] opt_params= params.get("opt_params", {}) predictions = model(features) if (mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL): loss = tf.contrib.keras.backend.categorical_crossentropy(predictions, labels) #loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logyhat) else: loss = None if mode == tf.estimator.ModeKeys.TRAIN: optimizer = getattr(tf.train, optimizer) train_op = optimizer(opt_params).minimize(loss) else: train_op = None return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op) def parser(record): keys_to_features = { 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64) } features = tf.parse_single_example( record, features=keys_to_features) # Convert from a scalar string tensor to a uint8 tensor image = tf.decode_raw(features['image_raw'], tf.float32) height = tf.cast(features['height'], tf.int32) width = tf.cast(features['width'], tf.int32) image_shape = tf.stack([height, width, 3]) image = tf.reshape(image, image_shape) label = tf.cast(features["label"], tf.int32) return image, label def get_dataset_inp_fn(filenames, epochs=20): def dataset_input_fn(): dataset = tf.contrib.data.TFRecordDataset(filenames) # Use `Dataset.map()` to build a pair of a feature dictionary and a label # tensor for each example. dataset = dataset.map(parser) dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.batch(32) dataset = dataset.repeat(epochs) iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() return features, labels return dataset_input_fn inpfun = get_dataset_inp_fn(["mydataset.tfrecords"], epochs=20) x,y = inpfun() print("X", x.graph) print("DEFAULT", g) print("MODEL", model.input.graph) # everything is on the same graph if not x.graph is tf.get_default_graph(): raise ValueError() with tf.Session(graph=g) as sess: est = tf.estimator.Estimator( model_fn, model_dir=None, config=None, params={"optimizer": "AdamOptimizer", "opt_params":{}} ) est.train(inpfun)
Dataset input from bmp images only 50% accurate
I've created this graph to try: Import BMP files and generate label based on their filename (L/R). Train a network to determine between the left and right eye. Evaluate the network. I'm using the new framework and get it all in as a dataset. The code runs, but I only get 50% accuracy (no learning happening). Can anyone check that the graph is right and it's just my network I need to fix ? """ Routine for processing Eye Image dataset determines left/right eye Using Tensorflow API v1.3 """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import fnmatch import tensorflow as tf from six.moves import xrange # pylint: disable=redefined-builtin import nnLayers as nnLayer IMAGE_SIZE = 460 SCALE_SIZE = 100 NUM_CLASSES = 2 IMAGE_DEPTH = 3 FLAGS = tf.app.flags.FLAGS # Basic model parameters. tf.app.flags.DEFINE_integer('batch_size', 200, """Number of images to process in a batch.""") tf.app.flags.DEFINE_integer('num_epochs', 1001, """Number of images to process in a batch.""") tf.app.flags.DEFINE_string('train_directory', './eyeImages', """directory of images to process.""") tf.app.flags.DEFINE_string('test_directory', './eyeTest', """directory of images to process.""") tf.app.flags.DEFINE_string('log_dir', './logs', """logging directory""") def _parse_function(filename, label): """Takes filenames and labels and returns one hot labels and image values""" #read the file image_string = tf.read_file(filename) #decode BMP file image_decoded = tf.image.decode_bmp(image_string) #resize accordingly image = tf.image.resize_images(image_decoded, [SCALE_SIZE, SCALE_SIZE]) #convert label to one hot one_hot = tf.one_hot(label, NUM_CLASSES) return image, one_hot def inference(image): #shape image for convolution with tf.name_scope('input_reshape'): x_image = tf.reshape(image, [-1, SCALE_SIZE, SCALE_SIZE, IMAGE_DEPTH]) #infer number of images, last dimension is features tf.summary.image('input_images',x_image) #neural net layers #100x100x3 -> 50x50x32 h_pool1 = nnLayer.conv_layer(x_image, IMAGE_DEPTH, 5, 32, 'hiddenLayer1', act=tf.nn.relu) #50x50x32 -> 25x25x64 h_pool2 = nnLayer.conv_layer(h_pool1, 32, 5, 64, 'hiddenLayer2', act=tf.nn.relu) #25x25x64 -> 1024x2 h_fc1 = nnLayer.fc_layer(h_pool2, 64, 25, 1024, 'fcLayer1', act=tf.nn.relu) #1024x2 ->1x2 with tf.name_scope('final-layer'): with tf.name_scope('weights'): W_fc2 = nnLayer.weight_variable([1024,NUM_CLASSES]) with tf.name_scope('biases'): b_fc2 = nnLayer.bias_variable([NUM_CLASSES]) y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2 return y_conv def folderParser(folder): """output BMP file names in directory and label based on file name""" #create list of filenames in directory files = os.listdir(folder) #filter for BMP files bmpfiles = fnmatch.filter(files, '*.bmp') #create empty lists labels = [] fullNames = [] #get the length of the filename and determine left/right label for i in range(len(bmpfiles)): length = len(bmpfiles[i]) fullNames.append(folder + '/' + bmpfiles[i]) if (bmpfiles[i][length-17])=='L': labels.append(1) else: labels.append(0) return fullNames,labels def main(argv=None): # pylint: disable=unused-argument #delete the log files if present #if tf.gfile.Exists(FLAGS.log_dir): # tf.gfile.DeleteRecursively(FLAGS.log_dir) #tf.gfile.MakeDirs(FLAGS.log_dir) #get file names and labels trainNames, trainLabels = folderParser(FLAGS.train_directory) testNames, testLabels = folderParser(FLAGS.test_directory) # create a dataset of the file names and labels tr_data = tf.contrib.data.Dataset.from_tensor_slices((trainNames, trainLabels)) ts_data = tf.contrib.data.Dataset.from_tensor_slices((testNames, testLabels)) #map the data set from file names to images tr_data = tr_data.map(_parse_function) ts_data = ts_data.map(_parse_function) #shuffle the images tr_data = tr_data.shuffle(FLAGS.batch_size*2) ts_data = ts_data.shuffle(FLAGS.batch_size*2) #create batches tr_data = tr_data.batch(FLAGS.batch_size) ts_data = ts_data.batch(FLAGS.batch_size) #create handle for datasets handle = tf.placeholder(tf.string, shape=[]) iterator = tf.contrib.data.Iterator.from_string_handle(handle, tr_data.output_types, tr_data.output_shapes) next_element = iterator.get_next() #setup iterator training_iterator = tr_data.make_initializable_iterator() validation_iterator = ts_data.make_initializable_iterator() #retrieve next batch features, labels = iterator.get_next() #run network y_conv = inference(features) #determine softmax and loss function with tf.variable_scope('softmax_linear') as scope: diff = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=y_conv) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) #run gradient descent with tf.name_scope('train'): training_op = tf.train.GradientDescentOptimizer(1e-3).minimize(cross_entropy) #identify correct predictions with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(labels, 1)) #find the accuracy of the model with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) with tf.Session() as sess: #initialization of the variables training_handle = sess.run(training_iterator.string_handle()) validation_handle = sess.run(validation_iterator.string_handle()) sess.run(tf.global_variables_initializer()) #merge all the summaries and write test summaries merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test') #run through epochs for epoch in range(FLAGS.num_epochs): #initialize the training set for training epoch sess.run(training_iterator.initializer) if epoch % 2 ==0: #initialize validation set sess.run(validation_iterator.initializer) #test summary, acc = sess.run([merged, accuracy], feed_dict={handle: validation_handle}) train_writer.add_summary(summary, epoch) #write to test file print('step %s, accuracy %s' % (epoch, acc)) else: #train sess.run(training_op, feed_dict={handle: training_handle}) #close the log files train_writer.close() test_writer.close() if __name__ == '__main__': tf.app.run() Aaron
The answer was image standardization: image_std = tf.image.per_image_standardization (image_resized) Without the image standardization the neurons were becoming saturated. Improved the outcome straight away. Thanks.
No variable to save error in Tensorflow
I am trying to save the model and then reuse it for classifying my images but unfortunately i am getting errors in restoring the model that i have saved. The code in which model has been created : # Deep Learning # ============= # # Assignment 4 # ------------ # In[25]: # These are all the modules we'll be using later. Make sure you can import them # before proceeding further. from __future__ import print_function import numpy as np import tensorflow as tf from six.moves import cPickle as pickle from six.moves import range # In[37]: pickle_file = 'notMNIST.pickle' with open(pickle_file, 'rb') as f: save = pickle.load(f) train_dataset = save['train_dataset'] train_labels = save['train_labels'] valid_dataset = save['valid_dataset'] valid_labels = save['valid_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] del save # hint to help gc free up memory print('Training set', train_dataset.shape, train_labels.shape) print('Validation set', valid_dataset.shape, valid_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) print(test_labels) # Reformat into a TensorFlow-friendly shape: # - convolutions need the image data formatted as a cube (width by height by #channels) # - labels as float 1-hot encodings. # In[38]: image_size = 28 num_labels = 10 num_channels = 1 # grayscale import numpy as np def reformat(dataset, labels): dataset = dataset.reshape( (-1, image_size, image_size, num_channels)).astype(np.float32) #print(np.arange(num_labels)) labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32) #print(labels[0,:]) print(labels[0]) return dataset, labels train_dataset, train_labels = reformat(train_dataset, train_labels) valid_dataset, valid_labels = reformat(valid_dataset, valid_labels) test_dataset, test_labels = reformat(test_dataset, test_labels) print('Training set', train_dataset.shape, train_labels.shape) print('Validation set', valid_dataset.shape, valid_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) #print(labels[0]) # In[39]: def accuracy(predictions, labels): return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0]) # Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes. # In[47]: batch_size = 16 patch_size = 5 depth = 16 num_hidden = 64 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1),name="layer1_weights") layer1_biases = tf.Variable(tf.zeros([depth]),name = "layer1_biases") layer2_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, depth, depth], stddev=0.1),name = "layer2_weights") layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]),name ="layer2_biases") layer3_weights = tf.Variable(tf.truncated_normal( [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1),name="layer3_biases") layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]),name = "layer3_biases") layer4_weights = tf.Variable(tf.truncated_normal( [num_hidden, num_labels], stddev=0.1),name = "layer4_weights") layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]),name = "layer4_biases") # Model. def model(data): conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer1_biases) conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer2_biases) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases) return tf.matmul(hidden, layer4_weights) + layer4_biases # Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) # Optimizer. optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) test_prediction = tf.nn.softmax(model(tf_test_dataset)) # In[48]: num_steps = 1001 #saver = tf.train.Saver() with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') for step in range(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if (step % 50 == 0): print('Minibatch loss at step %d: %f' % (step, l)) print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels)) print('Validation accuracy: %.1f%%' % accuracy( valid_prediction.eval(), valid_labels)) print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels)) save_path = tf.train.Saver().save(session, "/tmp/model.ckpt") print("Model saved in file: %s" % save_path) Everything works fine and the model is stored in the respective folder . I have created one more python file where i have tried restoring the model but getting an error there # In[1]: from __future__ import print_function import numpy as np import tensorflow as tf from six.moves import cPickle as pickle from six.moves import range # In[3]: image_size = 28 num_labels = 10 num_channels = 1 # grayscale import numpy as np # In[4]: def accuracy(predictions, labels): return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0]) # In[8]: batch_size = 16 patch_size = 5 depth = 16 num_hidden = 64 graph = tf.Graph() with graph.as_default(): '''# Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset)''' # Variables. layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1),name="layer1_weights") layer1_biases = tf.Variable(tf.zeros([depth]),name = "layer1_biases") layer2_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, depth, depth], stddev=0.1),name = "layer2_weights") layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]),name ="layer2_biases") layer3_weights = tf.Variable(tf.truncated_normal( [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1),name="layer3_biases") layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]),name = "layer3_biases") layer4_weights = tf.Variable(tf.truncated_normal( [num_hidden, num_labels], stddev=0.1),name = "layer4_weights") layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]),name = "layer4_biases") # Model. def model(data): conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer1_biases) conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer2_biases) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases) return tf.matmul(hidden, layer4_weights) + layer4_biases '''# Training computation. logits = model(tf_train_dataset) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) # Optimizer. optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)''' # Predictions for the training, validation, and test data. #train_prediction = tf.nn.softmax(logits) #valid_prediction = tf.nn.softmax(model(tf_valid_dataset)) #test_prediction = tf.nn.softmax(model(tf_test_dataset)) # In[17]: #saver = tf.train.Saver() with tf.Session() as sess: # Restore variables from disk. tf.train.Saver().restore(sess, "/tmp/model.ckpt") print("Model restored.") # Do some work with the model error that i am getting is : No variables to save Any help would be appreciated
The error here is quite subtle. In In[8] you create a tf.Graph called graph and set it as default for the with graph.as_default(): block. This means that all of the variables are created in graph, and if you print graph.all_variables() you should see a list of your variables. However, you exit the with block before creating (i) the tf.Session, and (ii) the tf.train.Saver. This means that the session and saver are created in a different graph (the global default tf.Graph that is used when you don't explicitly create one and set it as default), which doesn't contain any variables—or any nodes at all. There are at least two solutions: As Yaroslav suggests, you can write your program without using the with graph.as_default(): block, which avoids the confusion with multiple graphs. However, this can lead to name collisions between different cells in your IPython notebook, which is awkward when using the tf.train.Saver, since it uses the name property of a tf.Variable as the key in the checkpoint file. You can create the saver inside the with graph.as_default(): block, and create the tf.Session with an explicit graph, as follows: with graph.as_default(): # [Variable and model creation goes here.] saver = tf.train.Saver() # Gets all variables in `graph`. with tf.Session(graph=graph) as sess: saver.restore(sess) # Do some work with the model.... Alternatively, you can create the tf.Session inside the with graph.as_default(): block, in which case it will use graph for all of its operations.
You are creating a new session in In[17] which wipes your variables. Also, you don't need to use with blocks if you only have one default graph and one default session, you can instead do something like this sess = tf.InteractiveSession() layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1),name="layer1_weights") tf.train.Saver().restore(sess, "/tmp/model.ckpt")