I was trying to iterate over my data set several times. I used a tf.python_io.tf_record_iterator. But, I used it as follows:
record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename)
for z in range(4):
for k, string_record in enumerate(record_iterator):
Hence, the outer loop has no effect, and iteration finished just after the inner loop was done iterating over the dataset.
Any help is much appreciated!!

Finally, the new tensorflow Dataset api encoded this functionality. The full documentation is found at:
Long story short, this new api will enable the end user to iterate over his database multiple times using a for loop, or using the repeat() from the Dataset class.
Here is complete code on how I have used this API:
import tensorflow as tf
import numpy as np
import time
import cv2
num_epoch = 2
batch_size = 8 # This is set to 8 since
num_threads = 9
common = "C:/Users/user/PycharmProjects/AffectiveComputingNew/database/"
filenames = [(common + "train_1_db.tfrecords"), (common + "train_2_db.tfrecords"), (common + "train_3_db.tfrecords"),
(common + "train_4_db.tfrecords"), (common + "train_5_db.tfrecords"), (common + "train_6_db.tfrecords"),
(common + "train_7_db.tfrecords"), (common + "train_8_db.tfrecords"), (common + "train_9_db.tfrecords")]
# Transforms a scalar string `example_proto` into a pair of a scalar string and
# a scalar integer, representing an image and its label, respectively.
def _parse_function(example_proto):
features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'features': tf.FixedLenFeature([432], tf.float32)
parsed_features = tf.parse_single_example(example_proto, features)
# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(parsed_features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(parsed_features['height'], tf.int32)
width = tf.cast(parsed_features['width'], tf.int32)
# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
image = tf.reshape(image, [height, width, 3])
features = parsed_features['features']
return features, image
random_features = tf.Variable(tf.zeros([72, 432], tf.float32))
random_images = tf.Variable(tf.zeros([72, 112, 112, 3], tf.uint8))
datasets = []
for _ in filenames:
dataset_ziped =[0], datasets[1], datasets[2], datasets[3],
datasets[4], datasets[5], datasets[6], datasets[7], datasets[8]))
dataset = dataset_ziped.batch(batch_size)
iterator = dataset.make_initializable_iterator()
next_batch = iterator.get_next() # This has shape: [9, 2]
features = tf.concat((next_batch[0][0], next_batch[1][0], next_batch[2][0], next_batch[3][0],
next_batch[4][0], next_batch[5][0], next_batch[6][0], next_batch[7][0],
next_batch[8][0]), axis=0)
images = tf.concat((next_batch[0][1], next_batch[1][1], next_batch[2][1], next_batch[3][1],
next_batch[4][1], next_batch[5][1], next_batch[6][1], next_batch[7][1],
next_batch[8][1]), axis=0)
def get_features(features, images):
with tf.control_dependencies([tf.assign(random_features, features), tf.assign(random_images, images)]):
features = tf.reshape(features, shape=[9, 8, 432]) # where 8 * 9 = 72
features = tf.transpose(features, perm=[1, 0, 2]) # shape becomes: [8, 9, 432]
features = tf.reshape(features, shape=[72, 432]) # Now frames will be: 1st frame from 1st video, second from second video...
images = tf.reshape(images, shape=[9, 8, 112, 112, 3])
images = tf.transpose(images, perm=[1, 0, 2, 3, 4])
images = tf.reshape(images, shape=[72, 112, 112, 3])
return features, images
condition1 = tf.equal(tf.shape(features)[0], batch_size * 9)
condition2 = tf.equal(tf.shape(images)[0], batch_size * 9)
condition = tf.logical_and(condition1, condition2)
features, images = tf.cond(condition,
lambda: get_features(features, images),
lambda: get_features(random_features, random_images))
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
# Initialize `iterator` with training data.
for _ in range(num_epoch):
# This while loop will run indefinitly until the end of the first epoch
while True:
lst = []
features_np, images_np =[features, images])
for f in features_np:
except tf.errors.OutOfRangeError:
One thing, since the last retrieved could be truncated, and this will lead to a problem (Notice how I am doing resize operations on features), therefore, I used a temporary variable that will be equal to a batch whenever the batch size is equal to my (batch_size * 9) "This is not important for now".


How to make 2 tensors the same length by mean | median imputation of the shortest tensor?

I'm trying to subclass a the base Keras layer to create a layer that will merge the rank 1 output of 2 layers of a skip connection by outputting the Dot product of 2 tensors. The 2 incoming tensors are created by Dense layers parsed by a Neural Architecture Search algorithm that randomly selects the number of Dense units and hence the length of the 2 tensors. These of course will usually not be of the same length. I am trying an experiment to see if casting them to the same length by means of appending the shorter tensor with a mathematically meaningful imputation: [e.g. mean | median | hypotenuse | cos | ... etc] then merging them by means of the dot product will outperform Add or Concatenate merging strategies. To make them the same length:
I try the overall strategy:
Find the shorter tensor.
Pass it to tf.reduce_mean() (aliasing the resulting mean as "rm" for the sake of discussion).
Create a list of [rm for rm in range(['difference in length of the longer tensor and the shorter tensor']). Cast as a tensor if necessary.
[pad | concatenate] the shorter tensor with the result of the operation above to make it equal in length.
Here is where I am running into a dead wall:
Since the tf operation reduce_mean is returning a future with its shape set as None (not assumed to be a scalar of 1), they are in a state of having a shape of '(None,)', which the tf.keras.layers.Dot layer refuses to ingest and throws a ValueError, as it does not see them as being the same length, though they always will be:
KerasTensor(type_spec=TensorSpec(shape=(None,), dtype=tf.float32, name=None), name='tf.math.reduce_mean/Mean:0', description="created by layer 'tf.math.reduce_mean'")
ValueError: A Concatenate layer should be called on a list of at least 1 input. Received: input_shape=[[(None,), (None,)], [(None, 3)]]
My code (in the package/module):
import tensorflow as tf
import numpy as np
class Linear1dDot(tf.keras.layers.Layer):
def __init__(self, input_dim=None,):
super(Linear1dDot, self).__init__()
def __call__(self, inputs):
max_len = tf.reduce_max(tf.Variable(
[inp.shape[1] for inp in inputs]))
for i in range(len(inputs)):
inp = inputs[i]
inp_lenght = inp.shape[1]
if inp_lenght < max_len:
print(f"{inp_lenght} < {max_len}")
# pad_with = inp.reduce_mean()
pad_with = tf.reduce_mean(inp, axis=1)
padding = [pad_with for _ in range(max_len - inp_lenght)]
inputs[i] = tf.keras.layers.concatenate([padding, [inp]])
# inputs[i] = tf.reshape(
# tf.pad(inp, padding, mode="constant"), (None, max_len))
return tf.keras.layers.Dot(axes=1)(inputs)
# Alternatively substituting the last few lines with:
pad_with = tf.reduce_mean(inp, axis=1, keepdims=True)
padding = tf.keras.layers.concatenate(
[pad_with for _ in range(max_len - inp_lenght)])
inputs[i] = tf.keras.layers.concatenate([padding, [inp]])
# inputs[i] = tf.reshape(
# tf.pad(inp, padding, mode="constant"), (None, max_len))
return tf.keras.layers.Dot(axes=1)(inputs)
... and countless other permutations of attempts ...
Does anyone know a workaround or have any advice? (other than 'Don't try to do this.')?
In the parent folder of this module's package ...
Test to simulate a skip connection merging into the current layer:
from linearoneddot.linear_one_d_dot import Linear1dDot
x = tf.constant([1, 2, 3, 4, 5])
y = tf.constant([0, 9, 8])
inp1 = tf.keras.layers.Input(shape=3)
inp2 = tf.keras.layers.Input(shape=5)
xd = tf.keras.layers.Dense(3, "relu")(inp1)
yd = tf.keras.layers.Dense(5, 'elu')(inp2)
combined = Linear1dDot()([xd, yd]) # tf.keras.layers.Dot(axes=1)([xd, yd])
z = tf.keras.layers.Dense(2)(combined)
model = tf.keras.Model(inputs=[inp1, inp2], outputs=z) # outputs=z)
print(model([x, y]))
print(model([np.random.random((3, 3)), np.random.random((3, 5))]))
Does anyone know a workaround that will be able to get the mean of the shorter rank 1 tensor as a scalar, which I can then append / pad to the shorter tensor to a set intended langth (same length as the longer tensor).
Try this, hope this will work, Try to padd the shortest input with 1, and then concat it with the input then take the dot product, then finally subtract the extra ones which were added in the dot product...
class Linear1dDot(tf.keras.layers.Layer):
def __init__(self,**kwargs):
super(Linear1dDot, self).__init__()
def __call__(self, inputs):
_input1 , _input2 = inputs
_input1_shape = _input1.shape[1]
_input2_shape = _input2.shape[1]
difference = tf.math.abs(_input1_shape - _input2_shape)
padded_input = tf.ones(shape=(1,difference))
if _input1_shape > _input2_shape:
padded_tensor = tf.concat([_input2 ,padded_input],axis=1)
scaled_output = tf.keras.layers.Dot(axes=1)([padded_tensor, _input1])
scaled_output -= tf.reduce_sum(padded_input)
return scaled_output
padded_tensor = tf.concat([_input1 , padded_input],axis=1)
scaled_output = tf.keras.layers.Dot(axes=1)([padded_tensor, _input2])
scaled_output -= tf.reduce_sum(padded_input)
return scaled_output
x = tf.constant([[1, 2, 3, 4, 5, 9]])
y = tf.constant([[0, 9, 8]])
inp1 = tf.keras.layers.Input(shape=3)
inp2 = tf.keras.layers.Input(shape=5)
xd = tf.keras.layers.Dense(5, "relu")(x)
yd = tf.keras.layers.Dense(3, 'elu')(y)
combined = Linear1dDot()([xd, yd]) # tf.keras.layers.Dot(axes=1)([xd, yd])
<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[4.4694786]], dtype=float32)>

In Pytorch, how to test simple image with my loaded model?

I made a alphabet classification CNN model using Pytorch, and then use that model to test it with a single image that I've never seen before. I extracted a bounding box in my handwriting image with opencv, but I don't know how to apply it to the model.
bounded my_image
this is custom dataset
class CustomDatasetFromCSV(Dataset):
def __init__(self, csv_path, height, width, transforms=None):
csv_path (string): path to csv file
height (int): image height
width (int): image width
transform: pytorch transforms for transforms and tensor conversion
""" = pd.read_csv(csv_path)
self.labels = np.asarray([:, 0])
self.height = height
self.width = width
self.transforms = transforms
def __getitem__(self, index):
single_image_label = self.labels[index]
# Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28])
img_as_np = np.asarray([index][1:]).reshape(28,28).astype('uint8')
# Convert image from numpy array to PIL image, mode 'L' is for grayscale
img_as_img = Image.fromarray(img_as_np)
img_as_img = img_as_img.convert('L')
# Transform image to tensor
if self.transforms is not None:
img_as_tensor = self.transforms(img_as_img)
# Return image and the label
return (img_as_tensor, single_image_label)
def __len__(self):
return len(
transformations = transforms.Compose([
alphabet_from_csv = CustomDatasetFromCSV("/content/drive/My Drive/A_Z Handwritten Data.csv",
28, 28, transformations)
random_seed = 50
data_size = len(alphabet_from_csv)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
if True:
train_indices, test_indices = indices[split:], indices[:split]
train_dataset = SubsetRandomSampler(train_indices)
test_dataset = SubsetRandomSampler(test_indices)
train_loader = = alphabet_from_csv,
batch_size = batch_size,
sampler = train_dataset)
test_loader = = alphabet_from_csv,
batch_size = batch_size,
sampler = test_dataset)
this is my model
class ConvNet3(nn.Module):
def __init__(self, num_classes=26):
self.layer1 = nn.Sequential(
nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=2, stride=2)
self.layer2 = nn.Sequential(
nn.Conv2d(28, 56, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=2, stride=2)
self.fc = nn.Sequential(
nn.Dropout(p = 0.5),
nn.Linear(56 * 7 * 7, 512),
nn.Dropout(p = 0.5),
nn.Linear(512, 26),
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet3(num_classes).to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
# train phase
# create a progress bar
batch_loss_list = []
progress = ProgressMonitor(length=len(train_dataset))
for batch, target in train_loader:
# Move the training data to the GPU
batch, target =,
# forward propagation
output = model( batch )
# calculate the loss
loss = loss_func( output, target )
# clear previous gradient computation
# backpropagate to compute gradients
# update model weights
# update progress bar
progress.update(batch.shape[0], sum(batch_loss_list)/len(batch_loss_list) )
def test():
# test phase
correct = 0
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
for batch, target in test_loader:
# Move the training batch to the GPU
batch, target =,
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
# accumulate correct number
correct += (output == target).sum().item()
# Calculate test accuracy
acc = 100 * float(correct) / len(test_dataset)
print( 'Test accuracy: {}/{} ({:.2f}%)'.format( correct, len(test_dataset), acc ) )
for epoch in range(num_epochs):
print("{}'s try".format(int(epoch)+1))
this is my image to bound
import cv2
import matplotlib.image as mpimg
im = cv2.imread('/content/drive/My Drive/my_handwritten.jpg')
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[1]
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h < 20: continue
red = (0, 0, 255)
cv2.rectangle(im, (x, y), (x+w, y+h), red, 2)
cv2.imwrite('my_handwritten_bounding.png', im)
img_result = []
img_for_class = im.copy()
margin_pixel = 60
for rect in rects:
#[y:y+h, x:x+w]
img_for_class[rect[1]-margin_pixel : rect[1]+rect[3]+margin_pixel,
rect[0]-margin_pixel : rect[0]+rect[2]+margin_pixel])
# Draw the rectangles
cv2.rectangle(im, (rect[0], rect[1]),
(rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2)
count = 0
nrows = 4
ncols = 7
for n in img_result:
count += 1
plt.subplot(nrows, ncols, count)
plt.imshow(cv2.resize(n,(28,28)), cmap='Greys', interpolation='nearest')
You have already written the function test to test your net. The only thing you should do — create batch with one image with same preprocessing as images in your dataset.
def test_one_image(I, model):
I - 28x28 uint8 numpy array
# test phase
# convert image to torch tensor and add batch dim
batch = torch.tensor(I / 255).unsqueeze(0)
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
batch =
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
return output

InvalidArgumentError (see above for traceback): indices[47,6] = 24 is not in [0, 23)

I am trying to run the following file and I continuously get the error "InvalidArgumentError (see above for traceback): indices[138,4] = 23 is not in [0, 23)". I have checked my vocab file. It has exactly 23 words in it.
The code works fine for a single line of new data inserted but when the data is continuous or more then this error pops out. Please help me to rectify this issue.
Below is a small snippet of my code . The line "word_embeddings = tf.nn.embedding_lookup(variable, word_ids)" is where the error comes.
def model_fn(features, labels, mode, params):
# For serving features are a bit different
if isinstance(features, dict):
features = ((features['words'], features['nwords']),
(features['chars'], features['nchars']))
# Read vocabs and inputs
(words, nwords), (chars, nchars) = features
dropout = params['dropout']
training = (mode == tf.estimator.ModeKeys.TRAIN)
vocab_words = tf.contrib.lookup.index_table_from_file(
params['words'], num_oov_buckets=params['num_oov_buckets'])
vocab_chars = tf.contrib.lookup.index_table_from_file(
params['chars'], num_oov_buckets=params['num_oov_buckets'])
with Path(params['tags']).open() as f:
indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
num_tags = len(indices) + 1
with Path(params['chars']).open() as f:
num_chars = sum(1 for _ in f) + params['num_oov_buckets']
# Char Embeddings
char_ids = vocab_chars.lookup(chars)
variable = tf.get_variable(
'chars_embeddings', [num_chars, params['dim_chars']], tf.float32)
char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
# Char LSTM
dim_words = tf.shape(char_embeddings)[1]
dim_chars = tf.shape(char_embeddings)[2]
flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']])
t = tf.transpose(flat, perm=[1, 0, 2])
lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
_, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32,
sequence_length=tf.reshape(nchars, [-1]))
_, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32,
sequence_length=tf.reshape(nchars, [-1]))
output = tf.concat([output_fw, output_bw], axis=-1)
char_embeddings = tf.reshape(output, [-1, dim_words, 50])
# Word Embeddings
word_ids = vocab_words.lookup(words)
glove = np.load(params['glove'])['embeddings'] # np.array
variable = np.vstack([glove, [[0.] * params['dim']]])
variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
word_embeddings = tf.nn.embedding_lookup(variable, word_ids)
# Concatenate Word and Char Embeddings
embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)
t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major
lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
output = tf.concat([output_fw, output_bw], axis=-1)
output = tf.transpose(output, perm=[1, 0, 2])
output = tf.layers.dropout(output, rate=dropout, training=training)
logits = tf.layers.dense(output, num_tags)
crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)
if mode == tf.estimator.ModeKeys.PREDICT:
# Predictions
reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
predictions = {
'pred_ids': pred_ids,
'tags': pred_strings
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
# Loss
vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
tags = vocab_tags.lookup(labels)
log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
logits, tags, nwords, crf_params)
loss = tf.reduce_mean(-log_likelihood)
# Metrics
weights = tf.sequence_mask(nwords)
metrics = {
'acc': tf.metrics.accuracy(tags, pred_ids, weights),
'precision': precision(tags, pred_ids, num_tags, indices, weights),
'recall': recall(tags, pred_ids, num_tags, indices, weights),
'f1': f1(tags, pred_ids, num_tags, indices, weights),
for metric_name, op in metrics.items():
tf.summary.scalar(metric_name, op[1])
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode, loss=loss, eval_metric_ops=metrics)
elif mode == tf.estimator.ModeKeys.TRAIN:
train_op = tf.train.AdamOptimizer().minimize(
loss, global_step=tf.train.get_or_create_global_step())
return tf.estimator.EstimatorSpec(
mode, loss=loss, train_op=train_op)
if __name__ == '__main__':
# Params
params = {
'dim': 300,
'dim_chars': 100,
'dropout': 0.5,
'num_oov_buckets': 1,
'epochs': 25,
'batch_size': 20,
'buffer': 30000000,
'char_lstm_size': 25,
'lstm_size': 100,
'words': str(Path(DATADIR, 'vocab.words.txt')),
'chars': str(Path(DATADIR, 'vocab.chars.txt')),
'tags': str(Path(DATADIR, 'vocab.tags.txt')),
'glove': str(Path(DATADIR, 'glove.npz'))
with Path('results1/params.json').open('w') as f:
json.dump(params, f, indent=4, sort_keys=True)
# Word Embeddings
word_ids = vocab_words.lookup(words)
glove = np.load(params['glove'])['embeddings'] # np.array
variable = np.vstack([glove, [[0.] * params['dim']]])
variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
word_embeddings = tf.nn.embedding_lookup(variable, word_ids)
Hope this is not too late for you.
I have been googling this issue for a while, hopefully got the root of it and turns out it was quite simple. Similar issues unsolved were here and here.
Chances are: You have seen an example of this embeddings code somewhere and tried to follow it (this was the case for me). However, the case is that coders and tensorflow assume that the id's for the inputs are sequential. I.e. that if you have 1000 items for example, then your id's are [0,1,2,3..998,999].
However, this is usually not the case with real data where id's are something like "xYzVryCmplxNm5m3r" (in this case, it will give and error because there are characters in the id and tensorflow will not accept that, it only accepts integers), or, in the very subtle case that is probably your case, the id's are actually integers but not sequential. For example, they can go like : ids=[68632548, 15323, ....].
In this case, tensorflow will accept the input data (because it's integers as expected) and give you this error, because the numbers are not sequential and actually much larger than the number of unique id's (this number+1 is usually set to be the limit for the vocab size).
The solution that worked for me was to map all the id values in the original dataframe to sequential id's, preserving their uniqueness, and then input the same data again (it actually worked !).
The code could be something like:
sqeuential_ids=[i for i in range(len(unique_ids))]
def map_ids_to_sequential(original_id):
return id_mapping_dict[original_id]

Does the support to generate dictionary structure?

The following is a piece of code from []. In this example, the map function is a user-defined function to read the data. And in the map function, we need to set the output types are [tf.uint8, label.dtype].
import cv2
# Use a custom OpenCV function to read the image, instead of the standard
# TensorFlow `tf.read_file()` operation.
def _read_py_function(filename, label):
image_decoded = cv2.imread(image_string, cv2.IMREAD_GRAYSCALE)
return image_decoded, label
# Use standard TensorFlow operations to resize the image to a fixed shape.
def _resize_function(image_decoded, label):
image_decoded.set_shape([None, None, None])
image_resized = tf.image.resize_images(image_decoded, [28, 28])
return image_resized, label
filenames = ["/var/data/image1.jpg", "/var/data/image2.jpg", ...]
labels = [0, 37, 29, 1, ...]
dataset =, labels))
dataset =
lambda filename, label: tuple(tf.py_func(
_read_py_function, [filename, label], [tf.uint8, label.dtype])))
dataset =
My question is, if we want to the _read_py_function() output a Python dictionary, then how do we set the outptu types? Is there an inherit data type such as tf.dict? For example:
def _read_py_function(filename):
image_filename = filename[0]
label_filename = filename[1]
image_id = filename[2]
image_age = filename[3]
image_decoded = cv2.imread(image_filename, cv2.IMREAD_GRAYSCALE)
image_decoded = cv2.imread(label_fielname, cv2.IMREAD_GRAYSCALE)
return {'image':image_decoded, 'label':label_decoded, 'id':image_id, 'age':image_age}
Then, how do we design the function?
Returning dicts inside the function called by should work as expected.
Here is an example:
dataset =
dataset = x: {'a': x, 'b': 2 * x})
dataset = y: y['a'] + y['b'])
res = dataset.make_one_shot_iterator().get_next()
with tf.Session() as sess:
for i in range(10):
assert == 3 * i
To add to the above answer this also works:
dataset =
dataset = x: {'a': x, 'b': 2 * x})
res = dataset.make_one_shot_iterator().get_next()
with tf.Session() as sess:
for i in range(10):
curr_res =
assert curr_res['a'] == i
assert curr_res['b'] == 2 * i

Strange values of training and testing when running my CNN in Tensorflow

I´ve been trying to train and evaluate a convolutional neural network using my own data, which consists in 200 training images and 20 testing images. My complete script is here:
Error while running a convolutional network using my own data in Tensorflow
When I run it, I don´t get any error and it seems to complete the whole process just fine, but the training values and testing result change randomly each time I run it, so I think that it´s not training anything at all.
When I print the values of image_train_batch_eval and label_train_batch_eval I get a tensor with 5 examples and 5 labels (as batch_size_train is 5) so I think that the batching process works fine.
I don´t really know what might be the problem, but there must be something I´m missing. Thank you in advance.
EDIT: These are the results I get.
Step 0, Traininig accuracy: 0.2
Step 2, Traininig accuracy: 0.4
Step 4, Traininig accuracy: 1
Step 6, Traininig accuracy: 1
Step 8, Traininig accuracy: 0.6
Step 10, Traininig accuracy: 0.8
Step 12, Traininig accuracy: 0.8
Step 14, Traininig accuracy: 0
Step 16, Traininig accuracy: 0.8
Step 18, Traininig accuracy: 0
Step 20, Traininig accuracy: 0.8
Step 22, Traininig accuracy: 0
Step 24, Traininig accuracy: 0
Step 26, Traininig accuracy: 0.2
Step 28, Traininig accuracy: 0.8
Step 30, Traininig accuracy: 0.4
Step 32, Traininig accuracy: 0
Step 34, Traininig accuracy: 1
Step 36, Traininig accuracy: 1
Step 38, Traininig accuracy: 0
Step 40, Traininig accuracy: 0.2
Step 42, Traininig accuracy: 0
Step 44, Traininig accuracy: 0.8
Step 46, Traininig accuracy: 0
Step 48, Traininig accuracy: 0.8
Testing accuracy: 0
But these values change everytime.
sinc I can't follow what your code. here an example a full conv layer script using Tensorflow.
If you're working with images it really does make sense to serialize your data convolution operations are tense enough!
The following script serializes youe images in TFrecords format. [based on Inception example ].
Converts image data to TFRecords file format with Example protos.
The image data set is expected to reside in JPEG files located in the
following directory structure.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import os
import random
import sys
import threading
import numpy as np
import tensorflow as tf'train_directory', '/tmp/',
'Training data directory')'validation_directory', '/tmp/',
'Validation data directory')'output_directory', '/tmp/',
'Output data directory')'train_shards', 2,
'Number of shards in training TFRecord files.')'validation_shards', 2,
'Number of shards in validation TFRecord files.')'num_threads', 2,
'Number of threads to preprocess the images.')
# The labels file contains a list of valid labels are held in this file.
# Assumes that the file contains entries as such:
# dog
# cat
# flower
# where each line corresponds to a label. We map each label contained in
# the file to an integer corresponding to the line number starting from 0.'labels_file', '', 'Labels file')
def _int64_feature(value):
"""Wrapper for inserting int64 features into Example proto."""
if not isinstance(value, list):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _bytes_feature(value):
"""Wrapper for inserting bytes features into Example proto."""
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _convert_to_example(filename, image_buffer, label, text, height, width):
"""Build an Example proto for an example.
filename: string, path to an image file, e.g., '/path/to/example.JPG'
image_buffer: string, JPEG encoding of RGB image
label: integer, identifier for the ground truth for the network
text: string, unique human-readable, e.g. 'dog'
height: integer, image height in pixels
width: integer, image width in pixels
Example proto
colorspace = 'RGB'
channels = 3
image_format = 'JPEG'
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': _int64_feature(height),
'image/width': _int64_feature(width),
'image/colorspace': _bytes_feature(tf.compat.as_bytes(colorspace)),
'image/channels': _int64_feature(channels),
'image/class/label': _int64_feature(label),
'image/class/text': _bytes_feature(tf.compat.as_bytes(text)),
'image/format': _bytes_feature(tf.compat.as_bytes(image_format)),
'image/filename': _bytes_feature(tf.compat.as_bytes(os.path.basename(filename))),
'image/encoded': _bytes_feature(tf.compat.as_bytes(image_buffer))}))
return example
class ImageCoder(object):
"""Helper class that provides TensorFlow image coding utilities."""
def __init__(self):
# Create a single Session to run all image coding calls.
self._sess = tf.Session()
# Initializes function that converts PNG to JPEG data.
self._png_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_png(self._png_data, channels=3)
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that decodes RGB JPEG data.
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
def png_to_jpeg(self, image_data):
feed_dict={self._png_data: image_data})
def decode_jpeg(self, image_data):
image =,
feed_dict={self._decode_jpeg_data: image_data})
assert len(image.shape) == 3
assert image.shape[2] == 3
return image
def _is_png(filename):
"""Determine if a file contains a PNG format image.
filename: string, path of the image file.
boolean indicating if the image is a PNG.
return '.png' in filename
def _process_image(filename, coder):
"""Process a single image file.
filename: string, path to an image file e.g., '/path/to/example.JPG'.
coder: instance of ImageCoder to provide TensorFlow image coding utils.
image_buffer: string, JPEG encoding of RGB image.
height: integer, image height in pixels.
width: integer, image width in pixels.
# Read the image file.
with tf.gfile.FastGFile(filename, 'rb') as f:
image_data =
# Convert any PNG to JPEG's for consistency.
if _is_png(filename):
print('Converting PNG to JPEG for %s' % filename)
image_data = coder.png_to_jpeg(image_data)
# Decode the RGB JPEG.
image = coder.decode_jpeg(image_data)
# Check that image converted to RGB
assert len(image.shape) == 3
height = image.shape[0]
width = image.shape[1]
assert image.shape[2] == 3
return image_data, height, width
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
texts, labels, num_shards):
"""Processes and saves list of images as TFRecord in 1 thread.
coder: instance of ImageCoder to provide TensorFlow image coding utils.
thread_index: integer, unique batch to run index is within [0, len(ranges)).
ranges: list of pairs of integers specifying ranges of each batches to
analyze in parallel.
name: string, unique identifier specifying the data set
filenames: list of strings; each string is a path to an image file
texts: list of strings; each string is human readable, e.g. 'dog'
labels: list of integer; each integer identifies the ground truth
num_shards: integer number of shards for this data set.
# Each thread produces N shards where N = int(num_shards / num_threads).
# For instance, if num_shards = 128, and the num_threads = 2, then the first
# thread would produce shards [0, 64).
num_threads = len(ranges)
assert not num_shards % num_threads
num_shards_per_batch = int(num_shards / num_threads)
shard_ranges = np.linspace(ranges[thread_index][0],
num_shards_per_batch + 1).astype(int)
num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
counter = 0
for s in range(num_shards_per_batch):
# Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
shard = thread_index * num_shards_per_batch + s
output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
output_file = os.path.join(FLAGS.output_directory, output_filename)
writer = tf.python_io.TFRecordWriter(output_file)
shard_counter = 0
files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
for i in files_in_shard:
filename = filenames[i]
label = labels[i]
text = texts[i]
image_buffer, height, width = _process_image(filename, coder)
except Exception as e:
print('SKIPPED: Unexpected eror while decoding %s.' % filename)
example = _convert_to_example(filename, image_buffer, label,
text, height, width)
shard_counter += 1
counter += 1
if not counter % 1000:
print('%s [thread %d]: Processed %d of %d images in thread batch.' %
(, thread_index, counter, num_files_in_thread))
print('%s [thread %d]: Wrote %d images to %s' %
(, thread_index, shard_counter, output_file))
shard_counter = 0
print('%s [thread %d]: Wrote %d images to %d shards.' %
(, thread_index, counter, num_files_in_thread))
def _process_image_files(name, filenames, texts, labels, num_shards):
"""Process and save list of images as TFRecord of Example protos.
name: string, unique identifier specifying the data set
filenames: list of strings; each string is a path to an image file
texts: list of strings; each string is human readable, e.g. 'dog'
labels: list of integer; each integer identifies the ground truth
num_shards: integer number of shards for this data set.
assert len(filenames) == len(texts)
assert len(filenames) == len(labels)
# Break all images into batches with a [ranges[i][0], ranges[i][1]].
spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(
ranges = []
for i in range(len(spacing) - 1):
ranges.append([spacing[i], spacing[i + 1]])
# Launch a thread for each batch.
print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
# Create a mechanism for monitoring when all threads are finished.
coord = tf.train.Coordinator()
# Create a generic TensorFlow-based utility for converting all image codings.
coder = ImageCoder()
threads = []
for thread_index in range(len(ranges)):
args = (coder, thread_index, ranges, name, filenames,
texts, labels, num_shards)
t = threading.Thread(target=_process_image_files_batch, args=args)
# Wait for all the threads to terminate.
print('%s: Finished writing all %d images in data set.' %
(, len(filenames)))
def _find_image_files(data_dir, labels_file):
"""Build a list of all images files and labels in the data set.
data_dir: string, path to the root directory of images.
Assumes that the image data set resides in JPEG files located in
the following directory structure.
where 'dog' is the label associated with these images.
labels_file: string, path to the labels file.
The list of valid labels are held in this file. Assumes that the file
contains entries as such:
where each line corresponds to a label. We map each label contained in
the file to an integer starting with the integer 0 corresponding to the
label contained in the first line.
filenames: list of strings; each string is a path to an image file.
texts: list of strings; each string is the class, e.g. 'dog'
labels: list of integer; each integer identifies the ground truth.
print('Determining list of input files and labels from %s.' % data_dir)
unique_labels = [l.strip() for l in tf.gfile.FastGFile(
labels_file, 'r').readlines()]
labels = []
filenames = []
texts = []
# Leave label index 0 empty as a background class.
label_index = 1
# Construct the list of JPEG files and labels.
for text in unique_labels:
jpeg_file_path = '%s/%s/*' % (data_dir, text)
matching_files = tf.gfile.Glob(jpeg_file_path)
labels.extend([label_index] * len(matching_files))
texts.extend([text] * len(matching_files))
if not label_index % 100:
print('Finished finding files in %d of %d classes.' % (
label_index, len(labels)))
label_index += 1
# Shuffle the ordering of all image files in order to guarantee
# random ordering of the images with respect to label in the
# saved TFRecord files. Make the randomization repeatable.
shuffled_index = list(range(len(filenames)))
filenames = [filenames[i] for i in shuffled_index]
texts = [texts[i] for i in shuffled_index]
labels = [labels[i] for i in shuffled_index]
print('Found %d JPEG files across %d labels inside %s.' %
(len(filenames), len(unique_labels), data_dir))
return filenames, texts, labels
def _process_dataset(name, directory, num_shards, labels_file):
"""Process a complete data set and save it as a TFRecord.
name: string, unique identifier specifying the data set.
directory: string, root path to the data set.
num_shards: integer number of shards for this data set.
labels_file: string, path to the labels file.
filenames, texts, labels = _find_image_files(directory, labels_file)
_process_image_files(name, filenames, texts, labels, num_shards)
def main(unused_argv):
assert not FLAGS.train_shards % FLAGS.num_threads, (
'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
assert not FLAGS.validation_shards % FLAGS.num_threads, (
'Please make the FLAGS.num_threads commensurate with '
print('Saving results to %s' % FLAGS.output_directory)
# Run it!
_process_dataset('validation', FLAGS.validation_directory,
FLAGS.validation_shards, FLAGS.labels_file)
_process_dataset('train', FLAGS.train_directory,
FLAGS.train_shards, FLAGS.labels_file)
if __name__ == '__main__':
you need to start the script as followed :
python --train_directory=TrainingSet --output_directory=TF_Recordsfolder --validation_directory=ReferenceSet --labels_file=labels.txt --train_shards=1 --validation_shards=1 --num_threads=1
PS: you need a labels.txt where the labels are saved.
After generating both training and test sets serialized files you can now use the data in the following convNN script:
import tensorflow as tf
import sys
import numpy as np
import matplotlib.pyplot as plt
filter_max_dimension = 50
filter_max_depth = 30
filter_h_and_w = [3,3]
filter_depth = [3,3]
numberOFclasses = 21
TensorBoard = "TB_conv2NN"
TF_Records = "TF_Recordsfolder"
learning_rate = 1e-5
max_numberofiteretion =100000
batchSize = 21
img_height = 128
img_width = 128
# 1st function to read images form TF_Record
def getImage(filename):
with tf.device('/cpu:0'):
# convert filenames to a queue for an input pipeline.
filenameQ = tf.train.string_input_producer([filename],num_epochs=None)
# object to read records
recordReader = tf.TFRecordReader()
# read the full set of features for a single example
key, fullExample =
# parse the full example into its' component features.
features = tf.parse_single_example(
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/colorspace': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/channels': tf.FixedLenFeature([], tf.int64),
'image/class/label': tf.FixedLenFeature([],tf.int64),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/format': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/filename': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
# now we are going to manipulate the label and image features
label = features['image/class/label']
image_buffer = features['image/encoded']
# Decode the jpeg
with tf.name_scope('decode_img',[image_buffer], None):
# decode
image = tf.image.decode_jpeg(image_buffer, channels=3)
# and convert to single precision data type
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
# cast image into a single array, where each element corresponds to the greyscale
# value of a single pixel.
# the "1-.." part inverts the image, so that the background is black.
# re-define label as a "one-hot" vector
# it will be [0,1] or [1,0] here.
# This approach can easily be extended to more classes.
label=tf.stack(tf.one_hot(label-1, numberOFclasses))
return label, image
with tf.device('/cpu:0'):
train_img,train_label = getImage(TF_Records+"/train-00000-of-00001")
# associate the "label_batch" and "image_batch" objects with a randomly selected batch---
# of labels and images respectively
train_imageBatch, train_labelBatch = tf.train.shuffle_batch([train_img, train_label], batch_size=batchSize,capacity=50,min_after_dequeue=10)
# and similarly for the validation data
validation_imageBatch, validation_labelBatch = tf.train.shuffle_batch([validation_img, validation_label],
def train():
with tf.device('/gpu:0'):
config =tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
#config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config = config)
#defining tensorflow graph :
with tf.name_scope("input"):
x = tf.placeholder(tf.float32,[None, img_width*img_height],name ="pixels_values")
y_= tf.placeholder(tf.float32,[None,numberOFclasses],name='Prediction')
with tf.name_scope("input_reshape"):
image_shaped =tf.reshape(x,[-1,img_height,img_width,1])
#defining weigths and biases:
def weights_variable (shape):
return tf.Variable(tf.truncated_normal(shape,stddev=0.1))
def bias_variable(shape):
return tf.Variable(tf.constant(0.1,shape=shape))
#help function to generates summaries for given variables
def variable_summaries(var):
with tf.name_scope('summaries'):
mean = tf.reduce_mean(var)
tf.summary.scalar('mean', mean)
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
tf.summary.scalar('stddev', stddev)
tf.summary.scalar('max', tf.reduce_max(var))
tf.summary.scalar('min', tf.reduce_min(var))
tf.summary.histogram('histogram', var)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
with tf.name_scope('1st_conv_layer'):
W_conv1 = weights_variable([filter_h_and_w[0],filter_h_and_w[0], 1, filter_depth[0]])
b_conv1 = bias_variable([filter_depth[0]])
h_conv1 = tf.nn.relu(conv2d(tf.reshape(x,[-1,img_width,img_height,1]), W_conv1) + b_conv1)
with tf.name_scope('1nd_Pooling_layer'):
h_conv1 = max_pool_2x2(h_conv1)
with tf.name_scope('2nd_conv_layer'):
W_conv2 = weights_variable([filter_h_and_w[1],filter_h_and_w[1], filter_depth[0], filter_depth[1]])
b_conv2 = bias_variable([filter_depth[1]])
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
with tf.name_scope('1st_Full_connected_Layer'):
W_fc1 = weights_variable([filter_depth[1]*64, 1024])
b_fc1 = bias_variable([1024])
h_pool_flat = tf.reshape(h_conv2, [-1,filter_depth[1]*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool_flat, W_fc1) + b_fc1)
with tf.name_scope('Dropout'):
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.name_scope('Output_layer'):
W_fc3 = weights_variable([1024, numberOFclasses])
b_fc3 = bias_variable([numberOFclasses])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc3) + b_fc3)
with tf.name_scope('cross_entropy'):
# The raw formulation of cross-entropy,
# tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),
# reduction_indices=[1]))
# can be numerically unstable.
# So here we use tf.nn.softmax_cross_entropy_with_logits on the
# raw outputs of the nn_layer above, and then average across
# the batch.
diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)
with tf.name_scope('total'):
cross_entropy = tf.reduce_mean(diff)
tf.summary.scalar('cross_entropy', cross_entropy)
with tf.name_scope('train'):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
with tf.name_scope('accuracy'):
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)
# Merging Summaries
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(TensorBoard + '/train', sess.graph)
test_writer = tf.summary.FileWriter(TensorBoard + '/test')
# initialize the variables
# start the threads used for reading files
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
# feeding function
def feed_dict(train):
if True :
#img_batch, labels_batch= tf.train.shuffle_batch([train_label,train_img],batch_size=batchSize,capacity=500,min_after_dequeue=200)
img_batch , labels_batch =[ train_labelBatch ,train_imageBatch])
dropoutValue = 0.7
# img_batch,labels_batch = tf.train.shuffle_batch([validation_label,validation_img],batch_size=batchSize,capacity=500,min_after_dequeue=200)
img_batch,labels_batch =[ validation_labelBatch,validation_imageBatch])
dropoutValue = 1
return {x:img_batch,y_:labels_batch,keep_prob:dropoutValue}
for i in range(max_numberofiteretion):
if i%10 == 0:#Run a Test
summary, acc =[merged,accuracy],feed_dict=feed_dict(False))
test_writer.add_summary(summary,i)# Save to TensorBoard
else: # Training
if i % 100 == 99: # Record execution stats
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
summary, _ =[merged, train_step],
train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
train_writer.add_summary(summary, i)
else: # Record a summary
output , summary, _ =[h_conv1,merged, train_step], feed_dict=feed_dict(True))
train_writer.add_summary(summary, i)
# finalise
filter_h_and_w[0] = np.random.randint(3, filter_max_dimension)
filter_h_and_w[1] = np.random.randint(3, filter_max_dimension)
filter_depth[0] = np.random.randint(3, filter_max_depth)
filter_depth[1] = np.random.randint(3, filter_max_depth)
TensorBoard = "ConV2NN/_filter"+str(filter_h_and_w[0])+"To"+str(filter_h_and_w[1])+"D"+str(filter_depth[0])+"To"+str(filter_depth[1])+"R10e5"
with tf.device('/gpu:0') :
The script is using both GPU and CPU if you don't have GPU TF is going to use the cpu of your device. The code is self explaining, u need to change the image resolution value and number of class. and you need to start Tensorboard, the script is save a test and train folder for tensorboard you just need to start it in your browser.
since you have only 2 classes I think two conv layers are enough, if you think you need more it pretty easy to add ones.
I hope this will help