RandomShuffleQueue is closed when reading TFRecords - tensorflow

I have converted a CSV file ("test03.txt") to a TFRecords-formated file ("test03.tfrecords"), but when I then read in the TFRecords file and try to use tf.train.shuffle_batch I get the error message
RandomShuffleQueue '_2_shuffle_batch_1/random_shuffle_queue' is closed and has insufficient elements (requested 10, current size 0)
The CSV file is
1,0
2,0
3,0
4,0
5,1
6,0
7,1
8,1
9,1
10,1
which I convert to a TFRecords file using
import pandas
import tensorflow as tf
csv = pandas.read_csv(r"test03.txt", header=None).values
with tf.python_io.TFRecordWriter("test03.tfrecords") as writer:
for row in csv:
features, label = row[:-1], row[-1]
example = tf.train.Example()
example.features.feature["features"].float_list.value.extend(features)
example.features.feature["label"].int64_list.value.append(label)
writer.write(example.SerializeToString())
But I get the above error message when I run the following code:
import tensorflow as tf
batch_size = 10
with tf.Session() as sess:
filename_queue = tf.train.string_input_producer(["test03.tfrecords"],num_epochs=1)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
feature_dict = {'features': tf.FixedLenFeature([], tf.int64),'label': tf.FixedLenFeature([], tf.int64)}
featuresLabel = tf.parse_single_example(serialized_example, features=feature_dict)
xdata = tf.cast(featuresLabel['features'], tf.int32)
label = tf.cast(featuresLabel['label'], tf.int32)
min_after_dequeue = 1
capacity = min_after_dequeue + 3 * batch_size
batch_of_xs, batch_of_labels = tf.train.shuffle_batch([xdata, label], batch_size=batch_size, capacity=capacity, num_threads=1, min_after_dequeue=min_after_dequeue)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
single_batch_xs, single_batch_ys = sess.run([batch_of_xs, batch_of_labels])

Your issue is located in the feature_dict. In your initial example you perform the conversion to TFRecords as follows:
example.features.feature["features"].float_list.value.extend(features)
example.features.feature["label"].int64_list.value.append(label)
Hence your features are encoded as floats and your labels as int64. However when you read them back you turn them into int64:
feature_dict = {'features': tf.FixedLenFeature([], tf.int64),'label': tf.FixedLenFeature([], tf.int64)}
You issue is as simply as matching the feature_dict to your initial encoding, hence changing the line above into:
feature_dict = {'features': tf.FixedLenFeature([], tf.float32),'label': tf.FixedLenFeature([], tf.int64)}
Solves the issue for me (along with a print of the single_batch_xs and ys at the end).

Related

make tf.Estimator use default graph

I am trying to make use of tensorflow protobuffer feeding pipeline. The easiest way seemed to use tf.estimator.Estimator with tf.contrib.data.TFRecordDataset. However, I came across the issue that it creates a new Graph in spite of being launched within with g.as_default(). In following code I see that both model tensors and tensors returned by the TFRecordDataset are the same before I feed them to Estimator, but become different within the Estimator. Any ideas how to put them on the same graph?
# coding: utf-8
import sys
import tensorflow as tf
from keras.applications.inception_v3 import InceptionV3
import numpy as np
final_activation='linear'
g = tf.Graph()
with g.as_default():
model = InceptionV3(weights='imagenet',
include_top=True,
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000)
def model_fn(mode, features, labels, params):
optimizer = params["optimizer"]
opt_params= params.get("opt_params", {})
predictions = model(features)
if (mode == tf.estimator.ModeKeys.TRAIN or
mode == tf.estimator.ModeKeys.EVAL):
loss = tf.contrib.keras.backend.categorical_crossentropy(predictions, labels)
#loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logyhat)
else:
loss = None
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = getattr(tf.train, optimizer)
train_op = optimizer(opt_params).minimize(loss)
else:
train_op = None
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op)
def parser(record):
keys_to_features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)
}
features = tf.parse_single_example(
record,
features=keys_to_features)
# Convert from a scalar string tensor to a uint8 tensor
image = tf.decode_raw(features['image_raw'], tf.float32)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
image = tf.reshape(image, image_shape)
label = tf.cast(features["label"], tf.int32)
return image, label
def get_dataset_inp_fn(filenames, epochs=20):
def dataset_input_fn():
dataset = tf.contrib.data.TFRecordDataset(filenames)
# Use `Dataset.map()` to build a pair of a feature dictionary and a label
# tensor for each example.
dataset = dataset.map(parser)
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(32)
dataset = dataset.repeat(epochs)
iterator = dataset.make_one_shot_iterator()
features, labels = iterator.get_next()
return features, labels
return dataset_input_fn
inpfun = get_dataset_inp_fn(["mydataset.tfrecords"], epochs=20)
x,y = inpfun()
print("X", x.graph)
print("DEFAULT", g)
print("MODEL", model.input.graph)
# everything is on the same graph
if not x.graph is tf.get_default_graph():
raise ValueError()
with tf.Session(graph=g) as sess:
est = tf.estimator.Estimator(
model_fn,
model_dir=None,
config=None,
params={"optimizer": "AdamOptimizer",
"opt_params":{}}
)
est.train(inpfun)

Tensorflow While Body Not Executing

I have a FIFO Queue reading from tfrecords file in tensorflow. Each record is consisted of an image and its annotation, that is, a set of features. I was trying to skip some images that is, not feeding them into the graph, or not viewing them, according to some features in mind. Therefore, I thought that the best case scenario was to use on a while loop. That loop is going to test the value of the specified feature and decide whether to proceed or not.
Kindly look at the following code:
import tensorflow as tf
import numpy as np
num_epoch = 100
tfrecords_filename_seq = ["C:/Users/user/PycharmProjects/AffectiveComputing/P16_db.tfrecords"]
filename_queue = tf.train.string_input_producer(tfrecords_filename_seq, num_epochs=num_epoch, shuffle=False, name='queue')
reader = tf.TFRecordReader()
current_image_confidence = tf.constant(0.0, dtype=tf.float32)
def body(i):
key, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'annotation_raw': tf.FixedLenFeature([], tf.string)
})
# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
image = tf.reshape(image, [height, width, 3])
annotation = tf.cast(features['annotation_raw'], tf.string)
t1 = tf.string_split([annotation], delimiter=',')
t2 = tf.reshape(t1.values, [1, -1])
t3 = tf.string_to_number(t2, out_type=tf.float32)
t_ = tf.slice(t3, begin=[0, 3], size=[1, 1])
# Note that t_ is holding a value of 1.0 or 0.0. So its a particular feature I'm interested in.
t_ = tf.Print(t_, data=[tf.shape(t_)], message='....')
z = tf.cond(t_[0][0] < 1.0, lambda: tf.add(i, 0.0), lambda: tf.add(i, 1.0))
return z
cond = lambda i: tf.equal(i, tf.constant(0.0, dtype=tf.float32))
loop = tf.while_loop(cond, body, [current_image_confidence])
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
sess.run(loop)
Finally, when trying to run the following code, it seems that the body is not executing and hence stuck in an infinite loop. And the tf.Print(...) in the body was not executed.
Why this is the case?
Any help is much appreciated!!
Your program is getting stick because you're not starting queue runners. Run tf.start_queue_runners() before running your loop op.

Multi Layer Tiff labelled dataset conversion to format that tensor flow can use for model optimisation

I'm a Python and Tensor Flow newbie, and was wondering...
How best to convert a labelled dataset of Multi-Layer Tiffs into a format that Tensor Flow can use for model optimisation / fine tuning ?
I currently have this code that puts each layer of a folder of Multi-Tiffs into a 3D Array, but i need to preserve the label or filename of the Multi-Tiffs. I have seen some tensor flow scripts to convert to TFRecords, however, I'm not sure if these preserve the file name ? How best would you go about this ? It will be quite a big dataset.
Any help much appreciated
import os # For file handling
from PIL import Image# Import Pillow image processing library
import numpy
CroppedMultiTiffs = "MultiTiffs/"
for filename in os.listdir(MultiTiffs):
## Imports Multi-Layer TIFF into 3D Numpy Array.
img = Image.open(MultiTiffs + filename)
imgArray = numpy.zeros( ( img.n_frames, img.size[1], img.size[0] ),numpy.uint8 )
try:
# for frames in range, img.n_frames for whole folder.
for frame in range(2,img.n_frames):
img.seek( frame )
imgArray[frame,:,:] = img
frame = frame + 1
except (EOFError): img.seek( 0 )
# output error if it doesn't find a file.
pass
print(imgArray.shape) # imgArray is now 3D
print(imgArray.size)
best wishes
TWP
okay, so I figured it out using the thread from Daniils blog
http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
However my current implimentation creates multiple TFRecords, and I think it needs to be a single TFRecord, so trying to figure out how to make it a single TFRecord. How do I do that?
Then I can validate it using a TFRecord Reading script to read it back and check it is in the right format for Tensor Flow. I currently get errors using the reading script.
from PIL import Image
import numpy as np
import tensorflow as tf
import os
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
path = 'test/'
output = 'output/'
fileList = [os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk(path) for f in files if f.endswith('.tif')]
print (fileList)
for filename in fileList:
basename = os.path.basename(filename)
file_name = basename[:-4]
print ("processing file: " , filename)
print (file_name)
if not os.path.exists(output):
os.mkdir(output)
writer = tf.python_io.TFRecordWriter(output+ file_name + '.tfrecord')
img = Image.open(filename)
imgArray = np.zeros( ( img.n_frames, img.size[1], img.size[0] ),np.uint8 )
## Imports Multi-Layer file into 3D Numpy Array.
try:
for frame in range(0,img.n_frames):
img.seek( frame )
imgArray[frame,:,:] = img
frame = frame + 1
except (EOFError): img.seek( 0 )
pass
print ("print img size:" , img.size)
print ("print image shape: " , imgArray.shape)
print ("print image size: " , imgArray.size)
annotation = np.array(Image.open(filename))
height = imgArray.shape[0]
width = imgArray.shape[1]
depth = imgArray.shape[2]
img_raw = imgArray.tostring()
annotation_raw = annotation.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(height),
'width': _int64_feature(width),
'depth': _int64_feature(depth), # for 3rd dimension
'image_raw': _bytes_feature(img_raw),
'mask_raw': _bytes_feature(annotation_raw)}))
writer.write(example.SerializeToString())
My current TFRecords Reading script
import tensorflow as tf
import os
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64)
})
image = tf.decode_raw(features['image_raw'], tf.uint8)
label = tf.cast(features['label'], tf.int32)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
depth = tf.cast(features['depth'], tf.int32)
return image, label, height, width, depth
with tf.Session() as sess:
filename_queue = tf.train.string_input_producer(["output/A.3.1.tfrecord"])
image, label, height, width, depth = read_and_decode(filename_queue)
image = tf.reshape(image, tf.stack([height, width, 3]))
image.set_shape([32,32,3])
init_op = tf.initialize_all_variables()
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(1000):
example, l = sess.run([image, label])
print (example,l)
coord.request_stop()
coord.join(threads)
receiving the error:-
InvalidArgumentError (see above for traceback): Name: , Feature: label (data type: int64) is required but could not be found.
Images are grayscale multi-page

Error when reading data from TFRecord file using string_input_producer

I wrote a script to change MNIST data into TFRecord format:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
def _init64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
mnist = input_data.read_data_sets("/path/to/data", dtype=tf.uint8, one_hot=True)
images = mnist.train.images
labels = mnist.train.labels
num_examples = mnist.train.num_examples
num_shards = 10
instances_per_shard = int(num_examples / num_shards)
idx = 0
for i in range(num_shards):
filename = '/tmp/mnist/tfrecord-%.2d' % i
writer = tf.python_io.TFRecordWriter(filename)
for j in range(instances_per_shard):
example = tf.train.Example(features=tf.train.Features(feature={
'label': _bytes_feature(labels[idx].tostring()),
'image_raw': _bytes_feature(images[idx].tostring())
}))
writer.write(example.SerializeToString())
idx += 1
writer.close()
then read data from TFRecords files:
import tensorflow as tf
files = tf.train.match_filenames_once('/tmp/mnist/tfrecord-*')
filename_queue = tf.train.string_input_producer(files, shuffle=False)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.string)
}
)
image = tf.decode_raw(features['image_raw'], tf.uint8)
decode_image = tf.reshape(image, [28, 28, 1])
label = features['label']
#label = tf.decode_raw(features['label'], tf.uint8)
#label = tf.reshape(label, [10])
batch_size = 4
capacity = 1000 + 3 * batch_size
example_batch, label_batch = tf.train.shuffle_batch([decode_image, label], batch_size=batch_size,
capacity=capacity, min_after_dequeue=30)
with tf.Session() as sess:
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(4):
cur_example_batch, cur_label_batch = sess.run([example_batch, label_batch])
print(cur_label_batch)
coord.request_stop()
coord.join(threads)
It runs all very well. But if I uncomment these two lines:
label = tf.decode_raw(features['label'], tf.uint8)
label = tf.reshape(label, [10])
I get the following error:
Caused by op 'shuffle_batch', defined at:
File "/home/chenk/workspace/tflearn/Learning/create_batch.py", line 27, in <module>
capacity=capacity, min_after_dequeue=30)
File "/opt/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/input.py", line 1217, in shuffle_batch
name=name)
File "/opt/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/input.py", line 788, in _shuffle_batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "/opt/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/data_flow_ops.py", line 457, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/opt/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 946, in _queue_dequeue_many_v2
timeout_ms=timeout_ms, name=name)
File "/opt/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
op_def=op_def)
File "/opt/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/opt/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1269, in __init__
self._traceback = _extract_stack()
OutOfRangeError (see above for traceback): RandomShuffleQueue '_1_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 4, current size 0)
[[Node: shuffle_batch = QueueDequeueManyV2[component_types=[DT_UINT8, DT_UINT8], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
Is there something wrong in my code? what is the right way to do this?
Thanks!
The mnist images are in uint8 but the labels are of type float64. When you write the tfrecords as to_string(), each float64 value will be converted to 8 bytes. So when you are reading the tfrecords you should reading it as tf.float64. Reading it as uint8 will produce 80 labels, and the error is actually caused by the reshape() function.
label = tf.decode_raw(features['label'], tf.float64)
label = tf.reshape(label, [10])

How can I convert TFRecords into numpy arrays?

The main idea is to convert TFRecords into numpy arrays. Assume that the TFRecord stores images. Specifically:
Read a TFRecord File and convert each image into a numpy array.
Write the image into 1.jpg, 2.jpg, etc.
At the same time, write the file name and label to the text file like this:
1.jpg 2
2.jpg 4
3.jpg 5
I currently use the following code:
import tensorflow as tf
import os
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64)
})
image = tf.decode_raw(features['image_raw'], tf.uint8)
label = tf.cast(features['label'], tf.int32)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
depth = tf.cast(features['depth'], tf.int32)
return image, label, height, width, depth
with tf.Session() as sess:
filename_queue = tf.train.string_input_producer(["../data/svhn/svhn_train.tfrecords"])
image, label, height, width, depth = read_and_decode(filename_queue)
image = tf.reshape(image, tf.pack([height, width, 3]))
image.set_shape([32,32,3])
init_op = tf.initialize_all_variables()
sess.run(init_op)
print (image.eval())
I'm just reading trying to get at least one image for starters. The code just gets stuck when I run this.
Oops, it was a silly mistake on my part. I used a string_input_producer but forgot to run the queue_runners.
with tf.Session() as sess:
filename_queue = tf.train.string_input_producer(["../data/svhn/svhn_train.tfrecords"])
image, label, height, width, depth = read_and_decode(filename_queue)
image = tf.reshape(image, tf.pack([height, width, 3]))
image.set_shape([32,32,3])
init_op = tf.initialize_all_variables()
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(1000):
example, l = sess.run([image, label])
print (example,l)
coord.request_stop()
coord.join(threads)