Does the tf.data.Dataset support to generate dictionary structure? - tensorflow

The following is a piece of code from [https://www.tensorflow.org/programmers_guide/datasets]. In this example, the map function is a user-defined function to read the data. And in the map function, we need to set the output types are [tf.uint8, label.dtype].
import cv2
# Use a custom OpenCV function to read the image, instead of the standard
# TensorFlow `tf.read_file()` operation.
def _read_py_function(filename, label):
image_decoded = cv2.imread(image_string, cv2.IMREAD_GRAYSCALE)
return image_decoded, label
# Use standard TensorFlow operations to resize the image to a fixed shape.
def _resize_function(image_decoded, label):
image_decoded.set_shape([None, None, None])
image_resized = tf.image.resize_images(image_decoded, [28, 28])
return image_resized, label
filenames = ["/var/data/image1.jpg", "/var/data/image2.jpg", ...]
labels = [0, 37, 29, 1, ...]
dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
dataset = dataset.map(
lambda filename, label: tuple(tf.py_func(
_read_py_function, [filename, label], [tf.uint8, label.dtype])))
dataset = dataset.map(_resize_function)
My question is, if we want to the _read_py_function() output a Python dictionary, then how do we set the outptu types? Is there an inherit data type such as tf.dict? For example:
def _read_py_function(filename):
image_filename = filename[0]
label_filename = filename[1]
image_id = filename[2]
image_age = filename[3]
image_decoded = cv2.imread(image_filename, cv2.IMREAD_GRAYSCALE)
image_decoded = cv2.imread(label_fielname, cv2.IMREAD_GRAYSCALE)
return {'image':image_decoded, 'label':label_decoded, 'id':image_id, 'age':image_age}
Then, how do we design the dataset.map() function?

Returning dicts inside the function called by tf.data.Dataset.map should work as expected.
Here is an example:
dataset = tf.data.Dataset.range(10)
dataset = dataset.map(lambda x: {'a': x, 'b': 2 * x})
dataset = dataset.map(lambda y: y['a'] + y['b'])
res = dataset.make_one_shot_iterator().get_next()
with tf.Session() as sess:
for i in range(10):
assert sess.run(res) == 3 * i

To add to the above answer this also works:
dataset = tf.data.Dataset.range(10)
dataset = dataset.map(lambda x: {'a': x, 'b': 2 * x})
res = dataset.make_one_shot_iterator().get_next()
with tf.Session() as sess:
for i in range(10):
curr_res = sess.run(res)
assert curr_res['a'] == i
assert curr_res['b'] == 2 * i

Related

Bad tensor shape when using tensorflow drop_remainder

I am trying to create a tf.data.Dataset from a generator. I want to make sure all of my batches have the exact same size, so I'm calling .batch(batch_size, drop_remainder=True) on my Dataset. Here's the relevant code:
train_data = tf.data.Dataset.from_generator(
lambda: map(tuple, train_generator),
(tf.float32, tf.float32),
(
tf.TensorShape([batch_size, crop_height, crop_width, 3]),
tf.TensorShape([batch_size, crop_height, crop_width, 3]),
),
)
val_data = tf.data.Dataset.from_generator(
lambda: map(tuple, val_generator),
(tf.float32, tf.float32),
(
tf.TensorShape([batch_size, crop_height, crop_width, 3]),
tf.TensorShape([batch_size, crop_height, crop_width, 3]),
),
)
my_train_data = train_data.batch(batch_size, drop_remainder=True)
my_val_data = val_data.batch(batch_size, drop_remainder=True)
But I get this error when I run it:
tensorflow.python.framework.errors_impl.InvalidArgumentError: input must be 4-dimensional[4,4,64,64,48] [Op:FusedBatchNormV3]
I get this error because I'm batching the data twice (batch_size is 4 in my error message). I tried to replace the batch_size with None in the .from_generator command, but I get the same error. If I remove the first argument completely like so:
(tf.TensorShape([options["crop_height"], options["crop_width"], 3]),
tf.TensorShape([options["crop_height"], options["crop_width"], 3]),
)
I get this error:
ValueError: `generator` yielded an element of shape (4, 128, 128, 3) where an element of shape (128, 128, 3) was expected.
How can I use drop_remainder without batching the data twice?
EDIT:
Adding code associated with generators:
class BaseGenerator(Sequence):
def __init__(
self,
image_filenames,
label_filenames,
batch_size=1,
is_train=True,
preprocess=None,
augment=None,
height=128,
width=128,
shuffle=False,
):
self.indices = np.arange(0, len(image_filenames))
self.image_filenames = np.array(image_filenames)
self.label_filenames = np.array(label_filenames)
self.batch_size = batch_size
self.is_train = is_train
self.preprocess = preprocess
self.augment = augment
self.crop_height = height
self.crop_width = width
self.shuffle = shuffle
self.on_epoch_end() # shuffle data
def __len__(self):
return int(np.ceil(len(self.indices) / float(self.batch_size)))
def __getitem__(self, index):
min_index = index * self.batch_size
max_index = min((index + 1) * self.batch_size, len(self.indices))
batch_indices = self.indices[min_index:max_index]
return self.generate(self.image_filenames[batch_indices], self.label_filenames[batch_indices])
def __call__(self):
return next(iter(self))
def on_epoch_end(self):
if self.is_train and self.shuffle:
np.random.shuffle(self.indices)
def generate(self, image_filenames, label_filenames):
X = np.zeros((self.batch_size, self.crop_height, self.crop_width, 3), dtype=np.float32)
y = np.zeros((self.batch_size, self.crop_height, self.crop_width), dtype=np.float32,)
for i, (image_fn, label_fn) in enumerate(zip(image_filenames, label_filenames)):
image = utils.load_image(image_fn)
label = utils.load_image(label_fn)
if self.augment:
augmented = self.augment(image=image, mask=label)
image = augmented["image"]
label = augmented["mask"]
if self.preprocess:
image = self.preprocess(image)
label = np.float32(helpers.one_hot_it(label=label))
X[i, :, :, :] = image
y[i, :, :, :] = label
return X, y
train_generator = BaseGenerator(
image_filenames=train_input_names,
label_filenames=train_output_names,
batch_size=batch_size,
is_train=True,
preprocess=preprocessing,
augment=None,
height=128,
width=128,
)
val_generator = BaseGenerator(
image_filenames=val_input_names,
label_filenames=val_output_names,
batch_size=batch_size,
is_train=False,
preprocess=preprocessing,
augment=None,
height=128,
width=128,
)
As you mentioned in the question, the issue is that you are batching your data twice. To overcome this problem, you can:
First, define a generator that yields single images (e.g. without batch dimension).
Then, group your examples into batches using the method batch of tf.data.Dataset.
In order to redefine BaseGenerator so that it yields single images, you can follow the next steps.
First, in the __init__ method, remove batch_size because it is no longer needed:
def __init__(
self,
image_filenames,
label_filenames,
is_train=True,
preprocess=None,
augment=None,
height=128,
width=128,
shuffle=False,
):
self.indices = np.arange(0, len(image_filenames))
self.image_filenames = np.array(image_filenames)
self.label_filenames = np.array(label_filenames)
self.is_train = is_train
self.preprocess = preprocess
self.augment = augment
self.crop_height = height
self.crop_width = width
self.shuffle = shuffle
self.on_epoch_end() # shuffle data
Second, adapt the method generate so that it yields a single example:
def generate(self, image_filename, label_filename):
image = utils.load_image(image_filename)
label = utils.load_label(label_filename)
if self.augment:
augmented = self.augment(image=image, mask=label)
image = augmented["image"]
label = augmented["mask"]
if self.preprocess:
image = self.preprocess(image)
label = np.float32(helpers.one_hot_it(label=label))
X = image # Shape=(self.crop_height, self.crop_width, 3)
Y = label # Shape=(self.crop_height, self.crop_width)
return X, y
Third, in the method __getitem__, pass only one filename:
def __getitem__(self, index):
return self.generate(self.image_filenames[index], self.label_filenames[index])
Finally, exclude the batch dimension when defining your tf.data.Dataset:
train_data = tf.data.Dataset.from_generator(
lambda: map(tuple, train_generator),
(tf.float32, tf.float32),
(
tf.TensorShape([crop_height, crop_width, 3]),
tf.TensorShape([crop_height, crop_width]),
),
)
my_train_data = train_data.batch(batch_size, drop_remainder=True)
it = iter(my_train_data)
x, y = next(it)
print(x.shape) # (4, 128, 128, 3)
print(y.shape) # (4, 128, 128)

InvalidArgumentError (see above for traceback): indices[47,6] = 24 is not in [0, 23)

I am trying to run the following main.py file and I continuously get the error "InvalidArgumentError (see above for traceback): indices[138,4] = 23 is not in [0, 23)". I have checked my vocab file. It has exactly 23 words in it.
The code works fine for a single line of new data inserted but when the data is continuous or more then this error pops out. Please help me to rectify this issue.
Below is a small snippet of my code . The line "word_embeddings = tf.nn.embedding_lookup(variable, word_ids)" is where the error comes.
def model_fn(features, labels, mode, params):
# For serving features are a bit different
if isinstance(features, dict):
features = ((features['words'], features['nwords']),
(features['chars'], features['nchars']))
# Read vocabs and inputs
(words, nwords), (chars, nchars) = features
dropout = params['dropout']
training = (mode == tf.estimator.ModeKeys.TRAIN)
vocab_words = tf.contrib.lookup.index_table_from_file(
params['words'], num_oov_buckets=params['num_oov_buckets'])
vocab_chars = tf.contrib.lookup.index_table_from_file(
params['chars'], num_oov_buckets=params['num_oov_buckets'])
with Path(params['tags']).open() as f:
indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
num_tags = len(indices) + 1
with Path(params['chars']).open() as f:
num_chars = sum(1 for _ in f) + params['num_oov_buckets']
# Char Embeddings
char_ids = vocab_chars.lookup(chars)
variable = tf.get_variable(
'chars_embeddings', [num_chars, params['dim_chars']], tf.float32)
char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
training=training)
# Char LSTM
dim_words = tf.shape(char_embeddings)[1]
dim_chars = tf.shape(char_embeddings)[2]
flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']])
t = tf.transpose(flat, perm=[1, 0, 2])
lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
_, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32,
sequence_length=tf.reshape(nchars, [-1]))
_, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32,
sequence_length=tf.reshape(nchars, [-1]))
output = tf.concat([output_fw, output_bw], axis=-1)
char_embeddings = tf.reshape(output, [-1, dim_words, 50])
# Word Embeddings
word_ids = vocab_words.lookup(words)
glove = np.load(params['glove'])['embeddings'] # np.array
variable = np.vstack([glove, [[0.] * params['dim']]])
variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
word_embeddings = tf.nn.embedding_lookup(variable, word_ids)
# Concatenate Word and Char Embeddings
embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)
# LSTM
t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major
lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
output = tf.concat([output_fw, output_bw], axis=-1)
output = tf.transpose(output, perm=[1, 0, 2])
output = tf.layers.dropout(output, rate=dropout, training=training)
# CRF
logits = tf.layers.dense(output, num_tags)
crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)
if mode == tf.estimator.ModeKeys.PREDICT:
# Predictions
reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
params['tags'])
pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
predictions = {
'pred_ids': pred_ids,
'tags': pred_strings
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
else:
# Loss
vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
tags = vocab_tags.lookup(labels)
log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
logits, tags, nwords, crf_params)
loss = tf.reduce_mean(-log_likelihood)
# Metrics
weights = tf.sequence_mask(nwords)
metrics = {
'acc': tf.metrics.accuracy(tags, pred_ids, weights),
'precision': precision(tags, pred_ids, num_tags, indices, weights),
'recall': recall(tags, pred_ids, num_tags, indices, weights),
'f1': f1(tags, pred_ids, num_tags, indices, weights),
}
for metric_name, op in metrics.items():
tf.summary.scalar(metric_name, op[1])
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode, loss=loss, eval_metric_ops=metrics)
elif mode == tf.estimator.ModeKeys.TRAIN:
train_op = tf.train.AdamOptimizer().minimize(
loss, global_step=tf.train.get_or_create_global_step())
return tf.estimator.EstimatorSpec(
mode, loss=loss, train_op=train_op)
if __name__ == '__main__':
# Params
params = {
'dim': 300,
'dim_chars': 100,
'dropout': 0.5,
'num_oov_buckets': 1,
'epochs': 25,
'batch_size': 20,
'buffer': 30000000,
'char_lstm_size': 25,
'lstm_size': 100,
'words': str(Path(DATADIR, 'vocab.words.txt')),
'chars': str(Path(DATADIR, 'vocab.chars.txt')),
'tags': str(Path(DATADIR, 'vocab.tags.txt')),
'glove': str(Path(DATADIR, 'glove.npz'))
}
with Path('results1/params.json').open('w') as f:
json.dump(params, f, indent=4, sort_keys=True)
# Word Embeddings
word_ids = vocab_words.lookup(words)
glove = np.load(params['glove'])['embeddings'] # np.array
variable = np.vstack([glove, [[0.] * params['dim']]])
variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
word_embeddings = tf.nn.embedding_lookup(variable, word_ids)
Hope this is not too late for you.
I have been googling this issue for a while, hopefully got the root of it and turns out it was quite simple. Similar issues unsolved were here and here.
Chances are: You have seen an example of this embeddings code somewhere and tried to follow it (this was the case for me). However, the case is that coders and tensorflow assume that the id's for the inputs are sequential. I.e. that if you have 1000 items for example, then your id's are [0,1,2,3..998,999].
However, this is usually not the case with real data where id's are something like "xYzVryCmplxNm5m3r" (in this case, it will give and error because there are characters in the id and tensorflow will not accept that, it only accepts integers), or, in the very subtle case that is probably your case, the id's are actually integers but not sequential. For example, they can go like : ids=[68632548, 15323, ....].
In this case, tensorflow will accept the input data (because it's integers as expected) and give you this error, because the numbers are not sequential and actually much larger than the number of unique id's (this number+1 is usually set to be the limit for the vocab size).
The solution that worked for me was to map all the id values in the original dataframe to sequential id's, preserving their uniqueness, and then input the same data again (it actually worked !).
The code could be something like:
unique_ids=np.unique(old_ids)
sqeuential_ids=[i for i in range(len(unique_ids))]
id_mapping_dict=dict(zip(unique_ids,sqeuential_ids))
def map_ids_to_sequential(original_id):
return id_mapping_dict[original_id]
df['ids']=df['ids'].apply(map_ids_to_sequential)

Data generated with Tensorflow Dataset.from_generator results in error when iterator.get_next() is called on it

I'm new to Tensorflow. I followed some online posts and wrote code to get data from a generator.
The code looks like this:
def gen(my_list_of_files):
for fl in my_list_of_files:
with open(fl) as f:
for line in f.readlines():
json_line = json.loads(line)
features = json_line['features']
labels = json_line['labels']
yield features, labels
def get_dataset():
generator = lambda: gen()
return tf.data.Dataset.from_generator(generator, (tf.float32, tf.float32))
def get_input():
dataset = get_dataset()
dataset = dataset.shuffle(buffer_size=buffer_size)
dataset = dataset.repeat().unbatch(tf.contrib.data.unbatch())
dataset = dataset.batch(batch_size, drop_remainder=False)
# This is where the problem is
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
When I run this, I get the error:
InvalidArgumentError (see above for traceback): Input element must have a non-scalar value in each component.
[[node IteratorGetNext (defined at /blah/blah/blah) ]]
Values I'm yielding look like:
[1, 2, 3, 4, 5, 6] # features
7 # label
My understanding of the error was that it cannot iterate over the dataset because it is not a vector. Is my understanding correct? How do I fix this?
{
"features": ["1","2"],
"labels": "2"
}
I don't see your error when I execute this code.
def gen():
with open('jsondataset') as f:
data = json.load(f)
features = data['features']
labels = data['labels']
print( features)
yield features, labels
def get_dataset():
generator = lambda: gen()
return tf.data.Dataset.from_generator(generator, (tf.float32, tf.float32))
def get_input():
dataset = get_dataset()
dataset = dataset.shuffle(buffer_size=5)
dataset = dataset.batch(5, drop_remainder=False)
# This is where the problem is
iter = dataset.make_one_shot_iterator()
features, labels = iter.get_next()
with tf.Session() as sess:
print(sess.run([features,labels]))
def main():
get_input()
if __name__ == "__main__":
main()
[array([[1., 2.]], dtype=float32), array([2.], dtype=float32)]

Tensorflow export strategy input function for bow_encoder

I am trying to implement serving function to be able to make predictions on saved text classification model. As I understood the goal is to create a function which will do almost exactly same as train_input_fn/eval_input_fn? I have following implementation of those functions:
def generate_training_input_fn(filename):
train_raw = pd.read_csv(filename[0], header=None)
x_train = train_raw.iloc[:, 1]
y_train = train_raw.iloc[:, 0]
vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
x_train = np.array(list(vocab_processor.fit_transform(x_train)))
n_words = len(vocab_processor.vocabulary_)
#print('Total words: %d' % n_words)
# Save a vocabulary list to file. Needed by the serving_input_fn for exporting the model.
with open('vocab_processor.pickle', 'wb') as f:
pickle.dump(vocab_processor, f)
features = tf.contrib.layers.bow_encoder(
x_train, vocab_size=n_words, embed_dim=EMBEDDING_SIZE)
return features, y_train
def generate_eval_input_fn(filename):
eval_raw = pd.read_csv(filename[0], header=None)
x_eval = eval_raw.iloc[:, 1]
y_eval = eval_raw.iloc[:, 0]
with open('vocab_processor.pickle', 'rb') as f:
vocab_processor = pickle.load(f)
n_words = len(vocab_processor.vocabulary_)
x_eval = np.array(list(vocab_processor.transform(x_eval)))
features = tf.contrib.layers.bow_encoder(
x_eval, vocab_size=n_words, embed_dim=EMBEDDING_SIZE)
#labels = tf.one_hot(y_eval, 15, 1, 0)
return features, y_eval
There is comment "Save a vocabulary list to file. Needed by the serving_input_fn for exporting the model" but there is no such function implemented and Experiment is created with export_strategies=None ( got this code from another coder ). I've tried to implement serving_input_fn like in census tutorial
def csv_serving_input_fn():
csv_row = tf.placeholder(shape=[None],dtype=tf.string)
features = parse_csv(csv_row)
return tf.contrib.learn.InputFnOps(features, None, {'csv_row': csv_row})
but have no idea how to implement parse_csv since my generate_eval_input_fn takes whole csv as pandas DataFrame

How to set a number for epoch in tf.python_io.tf_record_iterator

I was trying to iterate over my data set several times. I used a tf.python_io.tf_record_iterator. But, I used it as follows:
record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename)
for z in range(4):
for k, string_record in enumerate(record_iterator):
....
Hence, the outer loop has no effect, and iteration finished just after the inner loop was done iterating over the dataset.
Any help is much appreciated!!
Finally, the new tensorflow Dataset api encoded this functionality. The full documentation is found at: https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset.
Long story short, this new api will enable the end user to iterate over his database multiple times using a for loop, or using the repeat() from the Dataset class.
Here is complete code on how I have used this API:
import tensorflow as tf
import numpy as np
import time
import cv2
num_epoch = 2
batch_size = 8 # This is set to 8 since
num_threads = 9
common = "C:/Users/user/PycharmProjects/AffectiveComputingNew/database/"
filenames = [(common + "train_1_db.tfrecords"), (common + "train_2_db.tfrecords"), (common + "train_3_db.tfrecords"),
(common + "train_4_db.tfrecords"), (common + "train_5_db.tfrecords"), (common + "train_6_db.tfrecords"),
(common + "train_7_db.tfrecords"), (common + "train_8_db.tfrecords"), (common + "train_9_db.tfrecords")]
# Transforms a scalar string `example_proto` into a pair of a scalar string and
# a scalar integer, representing an image and its label, respectively.
def _parse_function(example_proto):
features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'features': tf.FixedLenFeature([432], tf.float32)
}
parsed_features = tf.parse_single_example(example_proto, features)
# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(parsed_features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(parsed_features['height'], tf.int32)
width = tf.cast(parsed_features['width'], tf.int32)
# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
image = tf.reshape(image, [height, width, 3])
features = parsed_features['features']
return features, image
random_features = tf.Variable(tf.zeros([72, 432], tf.float32))
random_images = tf.Variable(tf.zeros([72, 112, 112, 3], tf.uint8))
datasets = []
for _ in filenames:
datasets.append(tf.contrib.data.TFRecordDataset(_).map(_parse_function))
dataset_ziped = tf.contrib.data.TFRecordDataset.zip((datasets[0], datasets[1], datasets[2], datasets[3],
datasets[4], datasets[5], datasets[6], datasets[7], datasets[8]))
dataset = dataset_ziped.batch(batch_size)
iterator = dataset.make_initializable_iterator()
next_batch = iterator.get_next() # This has shape: [9, 2]
features = tf.concat((next_batch[0][0], next_batch[1][0], next_batch[2][0], next_batch[3][0],
next_batch[4][0], next_batch[5][0], next_batch[6][0], next_batch[7][0],
next_batch[8][0]), axis=0)
images = tf.concat((next_batch[0][1], next_batch[1][1], next_batch[2][1], next_batch[3][1],
next_batch[4][1], next_batch[5][1], next_batch[6][1], next_batch[7][1],
next_batch[8][1]), axis=0)
def get_features(features, images):
with tf.control_dependencies([tf.assign(random_features, features), tf.assign(random_images, images)]):
features = tf.reshape(features, shape=[9, 8, 432]) # where 8 * 9 = 72
features = tf.transpose(features, perm=[1, 0, 2]) # shape becomes: [8, 9, 432]
features = tf.reshape(features, shape=[72, 432]) # Now frames will be: 1st frame from 1st video, second from second video...
images = tf.reshape(images, shape=[9, 8, 112, 112, 3])
images = tf.transpose(images, perm=[1, 0, 2, 3, 4])
images = tf.reshape(images, shape=[72, 112, 112, 3])
return features, images
condition1 = tf.equal(tf.shape(features)[0], batch_size * 9)
condition2 = tf.equal(tf.shape(images)[0], batch_size * 9)
condition = tf.logical_and(condition1, condition2)
features, images = tf.cond(condition,
lambda: get_features(features, images),
lambda: get_features(random_features, random_images))
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
# Initialize `iterator` with training data.
sess.run(init_op)
for _ in range(num_epoch):
sess.run(iterator.initializer)
# This while loop will run indefinitly until the end of the first epoch
while True:
try:
lst = []
features_np, images_np = sess.run([features, images])
for f in features_np:
lst.append(f[0])
print(lst)
except tf.errors.OutOfRangeError:
print('errorrrrr')
break
One thing, since the last retrieved could be truncated, and this will lead to a problem (Notice how I am doing resize operations on features), therefore, I used a temporary variable that will be equal to a batch whenever the batch size is equal to my (batch_size * 9) "This is not important for now".