ValueError: Cannot feed value of shape (3375, 50, 50, 2) for Tensor 'Reshape:0', which has shape '(?, 5000)' - tensorflow

I am learning Tensorflow. Following is my code for MLP with TensorFlow. I have some issues with mismatching of data dimentions.
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
wholedataset = np.load('C:/Users/pourya/Downloads/WholeTrueData.npz')
data = wholedataset['wholedata'].astype('float32')
label = wholedataset['wholelabel'].astype('float32')
height = wholedataset['wholeheight'].astype('float32')
print(type(data[20,1,1,0]))
learning_rate = 0.001
training_iters = 5
display_step = 20
n_input = 3375
X = tf.placeholder("float32")
Y = tf.placeholder("float32")
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 2, 1])),
'wd1': tf.Variable(tf.random_normal([3, 3, 1, 1]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([1])),
'out': tf.Variable(tf.random_normal([1,50,50,1]))
}
mnist= data
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 2
batch_size = 100
x = tf.placeholder('float', shape = [None,50,50,2])
shape = x.get_shape().as_list()
dim = np.prod(shape[1:])
x_reshaped = tf.reshape(x, [-1, dim])
y = tf.placeholder('float', shape= [None,50,50,2])
shape = y.get_shape().as_list()
dim = np.prod(shape[1:])
y_reshaped = tf.reshape(y, [-1, dim])
def neural_network_model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([5000,
n_nodes_hl1])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1,
n_nodes_hl2])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2,
n_nodes_hl3])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3,
n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes])),}
l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']),
hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']),
hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden_3_layer['weights']),
hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3,output_layer['weights']) + output_layer['biases']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(n_input/batch_size)):
epoch_x = wholedataset['wholedata'].astype('float32')
epoch_y = wholedataset['wholedata'].astype('float32')
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y:
epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out
of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:mnist.test.images,
y:mnist.test.labels}))
train_neural_network(x)
I got the following error:
ValueError: Cannot feed value of shape (3375, 50, 50, 2) for Tensor 'Reshape:0', which has shape '(?, 5000)'
Does anyone know what is the issue with my code, and how can I fix it?
The data value is (3375, 50, 50, 2)
Thank you for anyone's input!

I think that the problem is that you use the same variable name x for the placeholder and the reshape, in lines
x = tf.placeholder('float', shape = [None,50,50,2])
and
x = tf.reshape(x, [-1, dim])
so that when you
feed_dict={x: your_val}
you are feeding the output of the reshape operation.
You should have different names, for instance
x_placeholder = tf.placeholder('float', shape = [None,50,50,2])
x_reshaped = tf.reshape(x, [-1, dim])
and then
feed_dict={x_placeholder: your_val}

Related

Trouble Training Same Tensorflow Model in PyTorch

I have trained a model in Tensorflow and am having trouble replicating it in PyTorch. The Tensorflow model achieves near 100% accuracy (the task is simple), but the PyTorch model performs at random. I've spent a while trying to figure this out, and can't understand what the problem could be.
The model is trained for the task of binary classification. Given an input utterance describing a quadrant and a (x, y, z) coordinate, the model has to predict if the (x, z) portion of the coordinate is in the quadrant described. For example, if the input text was "quadrant 1" and the coordinate was (0.5, -, 0.5), then the prediction should be true, but if the region was "quadrant 2" with the same coordinate, then the prediction should be false.
I generated some data and trained the model in Tensorflow using this code:
x_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="x_data")
y_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="y_data")
z_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="z_data")
# text and labels placeholders
text_data = tf.placeholder(tf.int32, [FLAGS.batch_size, maxtextlength])
text_lengths = tf.placeholder(tf.int32, [FLAGS.batch_size])
y_labels_placeholder = tf.placeholder(tf.int64, [FLAGS.batch_size])
# encode text and coordinate
embeddings = tf.Variable(tf.random_uniform([100, embedding_size], -1, -1))
rnn_inputs = tf.nn.embedding_lookup(embeddings, text_data)
rnn_layers = [tf.compat.v1.nn.rnn_cell.LSTMCell(size, initializer=tf.compat.v1.keras.initializers.glorot_normal) for size in [256]]
multi_rnn_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell(rnn_layers, state_is_tuple=True)
text_outputs, text_fstate = tf.compat.v1.nn.dynamic_rnn(cell=multi_rnn_cell,
inputs=rnn_inputs,
dtype=tf.float32, sequence_length=text_lengths)
# have fully connected layers to map them the input coordinates into the same dimension as the LSTM output layer from above
x_output_layer = tf.compat.v1.layers.dense(x_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='x_coordinate')
y_output_layer = tf.compat.v1.layers.dense(y_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='y_coordinate')
z_output_layer = tf.compat.v1.layers.dense(z_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='z_coordinate')
# add the representations
total_output_layer = x_output_layer + y_output_layer + z_output_layer + lstm_output_layer
# make the predictions with two fully connected layers
fc_1 = tf.compat.v1.layers.dense(total_output_layer, units=FLAGS.hidden_layer_size, activation=tf.nn.relu, name='fc_1')
logits = tf.compat.v1.layers.dense(fc_1, units=FLAGS.output_dims, activation=None, name='logits')
# train the model
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_labels_placeholder, logits=logits))
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, epsilon=1e-7)
gradients, variables = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.gradient_clip_threshold)
optimize = optimizer.apply_gradients(zip(gradients, variables))
# then it'll be trained with sess.run ...
Now for the PyTorch replication:
class BaselineModel(nn.Module):
def __init__(self):
super(BaselineModel, self).__init__()
self.encode_x = nn.Linear(1, embed_size)
self.encode_y = nn.Linear(1, embed_size)
self.encode_z = nn.Linear(1, embed_size)
self._embeddings = nn.Embedding(vocab_size, self.embedding_table_size)
nn.init.uniform_(self._embeddings.weight, -1.0, 1.0)
self.num_layers = 1
self.rnn = nn.LSTM(self.embedding_table_size, self.hidden_size, batch_first=True)
self.fc_after_text_lstm = nn.Linear(self.hidden_size, 100)
self.fc = nn.Linear(100, 256)
self.fc_final = nn.Linear(256, 2)
self.relu_activation = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
def init_hidden(self, batch_size, device='cuda:0'):
# for LSTM, we need # of layers
h_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
c_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
return h_0, c_0
def forward(self, input_text, x_coordinate=None, y_coordinate=None, z_coordinate=None):
x_embed = self.relu_activation(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
y_embed = self.relu_activation(self.encode_y(y_coordinate.cuda().to(torch.float32))).cuda()
z_embed = self.relu_activation(self.encode_z(z_coordinate.cuda().to(torch.float32))).cuda()
embeds = self._embeddings(input_text)
embedding, hidden = self.rnn(embeds, self.hidden)
text_fc = self.relu_activation(self.fc_after_text_lstm(embedding[:, -1]))
representations_so_far_added = torch.sum(torch.stack([text_fc, x_embed, y_embed, z_embed]), dim=0)
pre_final_embedding = self.relu_activation(self.fc(representations_so_far_added))
return self.fc_final(pre_final_embedding )
### training code
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, eps=1e-7)
criterion = nn.CrossEntropyLoss()
for input_text, x_coordinate, y_coordinate, z_coordinate, targets, train_data:
optimizer.zero_grad()
pred = model(input_text, x_coordinate=x_coordinate, y_coordinate=y_coordinate, z_coordinate=z_coordinate)
loss = criterion(pred.float(), targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
optimizer.step()
scheduler.step()
# accuracy evaluation code, this is evaluated over the entire epoch
pred_idx = F.log_softmax(pred, dim=1)
target_labels = targets.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
If anyone can spot any issue with the PyTorch implementation or maybe tell me what could be wrong, that would be much appreciated! I also tried to load the weights of the Tensorflow model into all the appropriate layers, and performance still struggles in PyTorch! Thanks in advance!
EDIT:
I have created a minimally reproducible example, because I still cannot figure out what the problem is. Any help would be still appreciated!
import torch
import torch.nn as nn
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr = 0.0005
n_epochs = 10
input_dim = 4
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50
class FeatureDataSet(torch.utils.data.Dataset):
def __init__(self, x_train, y_train, x_coordinates):
self.x_train = torch.tensor(x_train, dtype=torch.long)
self.y_train = torch.tensor(y_train)
self.x_coordinates = torch.tensor(x_coordinates, dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx], self.y_train[idx], self.x_coordinates[idx]
class RNN(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, batch_size):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
# linear layer to encode the coordinate
self.encode_x = nn.Linear(1, hidden_dim).cuda()
self._embeddings = nn.Embedding(40, 100).cuda()
# hidden_dim is 128
# layer_dim is 2
self.lstm = nn.LSTM(100, hidden_dim, layer_dim, batch_first=True).cuda()
self.fc = nn.Linear(2 * hidden_dim, output_dim).cuda()
self.batch_size = batch_size
self.hidden = None
def init_hidden(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
return [t.cpu() for t in (h0, c0)]
def forward(self, x, x_coordinate):
#initializing the hidden states
h0, c0 = self.init_hidden(x)
embeds = self._embeddings(x)
out, (hn, cn) = self.lstm(embeds.cuda(), (h0.cuda(), c0.cuda()))
x_embed = F.relu(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
representations_so_far_added = torch.cat([out[:, -1, :], x_embed], dim=1)
out = self.fc(representations_so_far_added)
return out
model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001)
print('Start model training')
import sklearn.metrics as skm
import torch.nn.functional as F
x_train = []
x_coordinates = []
y_train = []
for i in range(10000):
# create the data. if x_coordinate > 0 and the sentence says that (represented by [1, 5, 6, 8]), then we should predict positive else negative (if the x_coordinate > 0)
# same applies if the x_coordinate < 0, just that the sentence is now [1, 5, 6, 9]
if np.random.randint(0, 2) == 0:
if np.random.randint(0, 2) == 0:
# x coordinate > 0
x_train.append([1, 5, 6, 8])
x_coordinates.append([round(np.random.uniform(0.01, 1.00, 1)[0], 2)])
y_train.append(1.0)
else:
# x coordinate > 0 negative
x_train.append([1, 5, 6, 8])
x_coordinates.append([round(np.random.uniform(-1.00, 0.00, 1)[0], 2)])
y_train.append(0.0)
else:
if np.random.randint(0, 2) == 0:
# x coordinate < 0
x_train.append([1, 5, 6, 9])
x_coordinates.append([round(np.random.uniform(-1.00, 0.00, 1)[0], 2)])
y_train.append(1.0)
else:
# x coordinate < 0 negative
x_train.append([1, 5, 6, 9])
x_coordinates.append([round(np.random.uniform(0.01, 1.00, 1)[0], 2)])
y_train.append(0.0)
# print a sample of data
print(x_train[:10])
print(y_train[:10])
print(x_coordinates[:10])
# create a dataloader
trainingDataset = FeatureDataSet(x_train=x_train, y_train=y_train, x_coordinates=x_coordinates)
train_loader = torch.utils.data.DataLoader(dataset=trainingDataset, batch_size=batch_size, shuffle=True)
# for each epoch
for epoch in range(1, n_epochs + 1):
acc_all = []
# each batch
for i, (x_batch, y_batch, x_coord_batch) in enumerate(train_loader):
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
x_coord_batch = x_coord_batch.to(device)
opt.zero_grad()
# pass in the text (x_batch) and coordinate (x_coord_batch)
out = model(x_batch, x_coordinate=x_coord_batch)
loss = criterion(out.float(), y_batch.type(torch.LongTensor).cuda())
loss.backward()
opt.step()
pred_idx = F.log_softmax(out, dim=1)
target_labels = y_batch.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
acc_all.append(curr_acc)
print(np.mean(acc_all))
I suppose perhaps there are some mistakes in your dataset implementation in the PyTorch version.
I tried your pytorch BaselineModel on both the dataset in your "minimally reproducible example" and my own dataset constructed according to your description, and find that it works fine.
The following is my codes for testing on my own dataset. Note that I add several hyperparameters to the code of BaselineModel to make it run. I got accuracy over 99%.
import random
import torch
import torch.nn as nn
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr = 0.0005
n_epochs = 100
input_dim = 4
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
class FeatureDataSet(torch.utils.data.Dataset):
def __init__(self, x_train, y_train, x_coordinates, y_coordinates, z_coordinates):
self.x_train = torch.tensor(x_train, dtype=torch.long)
self.y_train = torch.tensor(y_train)
self.x_coordinates = torch.tensor(x_coordinates, dtype=torch.float32)
self.y_coordinates = torch.tensor(y_coordinates, dtype=torch.float32)
self.z_coordinates = torch.tensor(z_coordinates, dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx], self.y_train[idx], self.x_coordinates[idx], self.y_coordinates[idx], self.z_coordinates[idx]
class BaselineModel(nn.Module):
def __init__(self):
super(BaselineModel, self).__init__()
vocab_size = 40
self.hidden_size = 100
self.embedding_table_size = self.hidden_size
self.encode_x = nn.Linear(1, self.hidden_size)
self.encode_y = nn.Linear(1, self.hidden_size)
self.encode_z = nn.Linear(1, self.hidden_size)
self._embeddings = nn.Embedding(vocab_size, self.embedding_table_size)
nn.init.uniform_(self._embeddings.weight, -1.0, 1.0)
self.num_layers = 1
self.rnn = nn.LSTM(self.embedding_table_size, self.hidden_size, batch_first=True)
self.fc_after_text_lstm = nn.Linear(self.hidden_size, 100)
self.fc = nn.Linear(100, 256)
self.fc_final = nn.Linear(256, 2)
self.relu_activation = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
self.hidden = self.init_hidden(batch_size)
def init_hidden(self, batch_size, device='cuda:0'):
# for LSTM, we need # of layers
h_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
c_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
return h_0, c_0
def forward(self, input_text, x_coordinate=None, y_coordinate=None, z_coordinate=None):
x_embed = self.relu_activation(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
y_embed = self.relu_activation(self.encode_y(y_coordinate.cuda().to(torch.float32))).cuda()
z_embed = self.relu_activation(self.encode_z(z_coordinate.cuda().to(torch.float32))).cuda()
embeds = self._embeddings(input_text)
embedding, hidden = self.rnn(embeds, self.hidden)
text_fc = self.relu_activation(self.fc_after_text_lstm(embedding[:, -1]))
representations_so_far_added = torch.sum(torch.stack([text_fc, x_embed, y_embed, z_embed]), dim=0)
pre_final_embedding = self.relu_activation(self.fc(representations_so_far_added))
return self.fc_final(pre_final_embedding)
# model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
model = BaselineModel().cuda()
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001)
print('Start model training')
import sklearn.metrics as skm
import torch.nn.functional as F
x_train = []
x_coordinates = []
y_coordinates = []
z_coordinates = []
y_train = []
for i in range(10000):
x_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
y_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
z_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
x_coordinates.append([x_coordinate])
y_coordinates.append([y_coordinate])
z_coordinates.append([z_coordinate])
if np.random.randint(0, 2) == 0: # positive example
if x_coordinate <= 0 and z_coordinate <= 0:
x_train.append([1, 5, 6, 8])
elif x_coordinate <= 0 and z_coordinate > 0:
x_train.append([1, 5, 6, 9])
elif x_coordinate > 0 and z_coordinate <= 0:
x_train.append([1, 5, 6, 10])
elif x_coordinate > 0 and z_coordinate > 0:
x_train.append([1, 5, 6, 11])
y_train.append(1.0)
else:
if x_coordinate <= 0 and z_coordinate <= 0:
x_train.append(random.choice([[1, 5, 6, 9], [1, 5, 6, 10], [1, 5, 6, 11]]))
elif x_coordinate <= 0 and z_coordinate > 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 10], [1, 5, 6, 11]]))
elif x_coordinate > 0 and z_coordinate <= 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 9], [1, 5, 6, 11]]))
elif x_coordinate > 0 and z_coordinate > 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 9], [1, 5, 6, 10]]))
y_train.append(0.0)
# print a sample of data
print(x_train[:10])
print(y_train[:10])
print(x_coordinates[:10])
print(y_coordinates[:10])
print(z_coordinates[:10])
# create a dataloader
trainingDataset = FeatureDataSet(x_train=x_train, y_train=y_train, x_coordinates=x_coordinates, y_coordinates=y_coordinates, z_coordinates=z_coordinates)
train_loader = torch.utils.data.DataLoader(dataset=trainingDataset, batch_size=batch_size, shuffle=True)
# for each epoch
loss_meter = AverageMeter()
for epoch in range(1, n_epochs + 1):
acc_all = []
# each batch
loss_meter.reset()
for i, (x_batch, y_batch, x_coord_batch, y_coord_batch, z_coord_batch) in enumerate(train_loader):
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
x_coord_batch = x_coord_batch.to(device)
y_coord_batch = y_coord_batch.to(device)
z_coord_batch = z_coord_batch.to(device)
opt.zero_grad()
# pass in the text (x_batch) and coordinate (x_coord_batch)
out = model(x_batch, x_coordinate=x_coord_batch, y_coordinate=y_coord_batch, z_coordinate=z_coord_batch)
loss = criterion(out.float(), y_batch.type(torch.LongTensor).cuda())
loss.backward()
opt.step()
pred_idx = F.log_softmax(out, dim=1)
target_labels = y_batch.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
acc_all.append(curr_acc)
loss_meter.update(loss.item())
print(np.mean(acc_all))
print("loss is %f" % loss_meter.val)
As for the "minimally reproducible example", I think the model RNN doesn't work is quite reasonable, as I have stated in the comments. I suppose that tensorflow can not fit as well, although I have not tried it. Your "minimally reproducible example" may be unrelated to your main problem.

ValueError: Cannot feed value of shape (128, 28, 28) for Tensor 'Placeholder:0', which has shape '(?, 784)'

I am new to Tensorflow and Machine Learning and trying out CNN using Tensorflow with my custom input data. But I am getting the error attached below.
The Data or Image Size is 28x28 with 15 Labels.
I am not getting the numpy reshape thing in this script or the error.
Help is highly appreciated.
import tensorflow as tf
import os
import skimage.data
import numpy as np
import random
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".jpg")]
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(d)
print(str(d)+' Completed')
return images, labels
ROOT_PATH = "H:\Testing\TrainingData"
train_data_directory = os.path.join(ROOT_PATH, "Training")
test_data_directory = os.path.join(ROOT_PATH, "Testing")
print('Loading Data...')
images, labels = load_data(train_data_directory)
print('Data has been Loaded')
n_classes = 15
training_examples = 10500
test_examples = 4500
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def neural_network_model(x):
weights = {'W_Conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_Conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_FC':tf.Variable(tf.random_normal([7*7*64, 1024])),
'Output':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'B_Conv1':tf.Variable(tf.random_normal([32])),
'B_Conv2':tf.Variable(tf.random_normal([64])),
'B_FC':tf.Variable(tf.random_normal([1024])),
'Output':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1,28,28,1])
conv1 = conv2d(x, weights['W_Conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['W_Conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_FC'])+biases['B_FC'])
output = tf.matmul(fc, weights['Output'])+biases['Output']
return output
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(training_examples/batch_size)):
epoch_x, epoch_y = next_batch(batch_size, images, labels)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x: images, y: labels}))
print('Training Neural Network...')
train_neural_network(x)
What am I doing wrong? What is needed to be fixed and how do I fix the shape of numpy array?
If you look closely, you'll see that you have two x placeholders:
x = tf.placeholder('float', [None, 784]) # global
...
x = tf.reshape(x, shape=[-1,28,28,1]) # in neural_network_model
One of them is in the function scope, hence not visible in train_neural_network, so tensorflow takes the one with [?, 784] shape. You should get rid of one of them.
Also note that your training data has the rank 3, i.e. [batch_size, 28, 28], so it's not directly compatible with any of those placeholders.
To feed it into the first x, take epoch_x.reshape([-1, 784]). For the second placeholder (once you make it visible), take epoch_x.reshape([-1, 28, 28, 1]).

Getting different Accuracy rates in convolutional neural networks while using tensorflow and tflearn

So i was using tflearn to make a CNN and the accuracy was nice, but i tried to train the same kind of network with the same learning rates and other parameters. But for some reason i don't understand the accuracy i got when using tensorflow was lower. Is there any reason that this happened?
Here is my Neural net layer:
def cnn(x):
x = tflearn.layers.core.input_data(shape=[None, 50, 50, 3], placeholder=x)
conv_layer1 = tflearn.layers.conv.conv_2d(x, nb_filter=32, filter_size=5, activation='relu')
out_layer_1 = tflearn.layers.max_pool_2d(conv_layer1, 5)
conv_layer2 = tflearn.layers.conv.conv_2d(out_layer_1, nb_filter=64, filter_size=5, activation='relu')
out_layer_2 = tflearn.layers.max_pool_2d(conv_layer2, 5)
conv_layer3 = tflearn.layers.conv.conv_2d(out_layer_2, nb_filter=128, filter_size=5, activation='relu')
out_layer_3 = tflearn.layers.max_pool_2d(conv_layer3, 5)
conv_layer4 = tflearn.layers.conv.conv_2d(out_layer_3, nb_filter=64, filter_size=5, activation='relu')
out_layer_4 = tflearn.layers.max_pool_2d(conv_layer4, 5)
conv_layer5 = tflearn.layers.conv.conv_2d(out_layer_4, nb_filter=32, filter_size=5, activation='relu')
out_layer_5 = tflearn.layers.max_pool_2d(conv_layer5, 5)
fc1 = tflearn.layers.core.fully_connected(out_layer_5, 1024, activation='relu', name="FC1")
fc1_dropout = tflearn.layers.core.dropout(fc1, 0.5)
output = tflearn.layers.core.fully_connected(fc1_dropout, 2, activation='softmax', name='output')
return output
And here is my training function:
def train_model():
x = tf.placeholder(tf.float32, shape=[None, 50, 50, 3], name="x")
x_image = tf.reshape(x, [-1, 50, 50, 3])
y = tf.placeholder(tf.float32, shape=[None, 2], name="y")
y_cls = tf.argmax(y, dimension=1)
y_pred = cnn(x_image)
print "Importing Training Data..."
x_train = np.load('data/CatOrDog/training_images.npy')
y_train = np.load('data/CatOrDog/training_labels.npy')
y_train = [[1, 0] if label == 'Dog' else [0, 1] for label in y_train]
y_train = np.array(y_train)
randomer = np.arange(x_train.shape[0])
np.random.shuffle(randomer)
x_train = x_train[randomer]
y_train = y_train[randomer]
n_data = len(x_train)
x_train = np.array(x_train, dtype='float32')
print "Images Shape: ", x_train.shape, "\t", x_train.dtype
print "\nImporting Testing Data..."
x_test = np.load('data/CatOrDog/testing_images.npy')
y_test = np.load('data/CatOrDog/testing_labels.npy')
y_test = [[1, 0] if testing_label == 'Dog' else [0, 1] for testing_label in y_test]
y_test = np.array(y_test)
x_test = np.array(x_test, dtype='float32')
randomer = np.arange(x_test.shape[0])
np.random.shuffle(randomer)
x_test = x_test[randomer]
y_test = y_test[randomer]
n_data = len(x_train)
n_test_data =len(x_test)
'''divider = int(n_test_data / 2)
x_test_data = x_test[0:divider]
y_test_data = y_test[0:divider]
x_validation_data = x_test[divider+1:n_test_data-1]
y_validation_data = y_test[divider + 1:n_test_data - 1]'''
with tf.variable_scope("Softmax"):
y_pred_cls = tf.argmax(y_pred, dimension=1)
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y)
cost = tf.reduce_mean(cross_entropy)
with tf.name_scope("Optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
with tf.name_scope("Accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
writer = tf.summary.FileWriter("Training_FileWriter/")
writer1 = tf.summary.FileWriter("Validation_FileWriter/")
tf.summary.scalar('loss', cost)
tf.summary.scalar('accuracy', accuracy)
merged_summary = tf.summary.merge_all()
num_epochs = 10
batch_size = 300
with tf.Session() as sess:
# x = sess.graph.get_tensor_by_name('x')
sess.run(tf.global_variables_initializer())
writer.add_graph(sess.graph)
for epoch in range(num_epochs):
start_time = time.time()
train_accuracy = 0
cur_batch = int(n_data / batch_size)
prev_index = 0
bar = progressbar.ProgressBar(maxval=cur_batch)
bar.start()
for batch in range(0, cur_batch):
start, end = ClassifyData.get_batch_array_indexes(previous_index=prev_index, batch_size=batch_size, n_data=n_data)
if start == n_data:
break
x_batch = x_train[start:end]
y_true_batch = y_train[start:end]
feed_dict_train = {x: x_batch, y: y_true_batch}
sess.run(optimizer, feed_dict=feed_dict_train)
train_accuracy += sess.run(accuracy, feed_dict=feed_dict_train)
summ = sess.run(merged_summary, feed_dict=feed_dict_train)
writer.add_summary(summ, epoch * int(n_data / batch_size) + batch)
bar.update(batch)
bar.finish()
train_accuracy /= int(n_data / batch_size)
summ, vali_accuracy = sess.run([merged_summary, accuracy],
feed_dict={x: x_test, y: y_test})
writer1.add_summary(summ, epoch)
end_time = time.time()
print "\nEpoch " + str(epoch + 1) + " completed : Time usage " + str(
int(end_time - start_time)) + " seconds"
print "\tAccuracy:"
print "\t- Training Accuracy:\t{}".format(train_accuracy)
print "\t- Validation Accuracy:\t{}".format(vali_accuracy)
PS I am using tflearn to build my network.

Tensorflow: Can't overfit training data with batch size > 1

I coded a small RNN network with Tensorflow to return the total energy consumption given some parameters. There seem to be a problem in my code. It can't overfit the training data when I use a batch size > 1 (even with only 4 samples!). In the code below, the loss value reaches 0 when I set BatchSize to 1. However, by setting BatchSize to 2, the network fails to overfit and the loss value goes toward 12.500000 and gets stuck there forever.
I suspect this has something to do with LSTM states. I get the same problem if I don't update the state with each iteration. Or maybe the cost function? A help is appreciated. Thanks.
import tensorflow as tf
import numpy as np
import os
from utils import loadData
Epochs = 10000
LearningRate = 0.0001
MaxGradNorm = 5
SeqLen = 1
NChannels = 28
NClasses = 1
NLayers = 2
NUnits = 256
BatchSize = 1
NumSamples = 4
#################################################################
trainingFile = "./training.dat"
X_values, Y_values = loadData(trainingFile, SeqLen, NumSamples)
X = tf.placeholder(tf.float32, [BatchSize, SeqLen, NChannels], name='inputs')
Y = tf.placeholder(tf.float32, [BatchSize, SeqLen, NClasses], name='labels')
keep_prob = tf.placeholder(tf.float32, name='keep')
initializer = tf.contrib.layers.xavier_initializer()
Xin = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))
lstm_layers = []
for i in range(NLayers):
lstm_layer = tf.nn.rnn_cell.LSTMCell(num_units=NUnits, initializer=initializer, use_peepholes=True, state_is_tuple=True)
dropout_layer = tf.contrib.rnn.DropoutWrapper(lstm_layer, output_keep_prob=keep_prob)
#[LSTM ---> DROPOUT] ---> [LSTM ---> DROPOUT] ---> etc...
lstm_layers.append(dropout_layer)
rnn = tf.nn.rnn_cell.MultiRNNCell(lstm_layers, state_is_tuple=True)
initial_state = rnn.zero_state(BatchSize, tf.float32)
outputs, final_state = tf.nn.static_rnn(rnn, Xin, dtype=tf.float32, initial_state=initial_state)
outputs = tf.transpose(outputs, [1,0,2])
outputs = tf.reshape(outputs, [-1, NUnits])
weight = tf.Variable(tf.truncated_normal([NUnits, NClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[NClasses]))
prediction = tf.matmul(outputs, weight) + bias
prediction = tf.reshape(prediction, [BatchSize, SeqLen, NClasses])
cost = tf.reduce_sum(tf.pow(tf.subtract(prediction, Y), 2)) / (2 * BatchSize)
tvars = tf.trainable_variables()
grad, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), MaxGradNorm)
optimizer = tf.train.AdamOptimizer(learning_rate = LearningRate)
train_step = optimizer.apply_gradients(zip(grad, tvars))
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iteration = 1
for e in range(0, Epochs):
train_loss = []
state = sess.run(initial_state)
for i in xrange(0, len(X_values), BatchSize):
x = X_values[i:i + BatchSize]
y = Y_values[i:i + BatchSize]
y = np.expand_dims(y, 2)
feed = {X : x, Y : y, keep_prob : 1.0, initial_state : state}
_ , loss, state, pred = sess.run([train_step, cost, final_state, prediction], feed_dict = feed)
train_loss.append(loss)
iteration += 1
print("Epoch: {}/{}".format(e, Epochs), "Iteration: {:d}".format(iteration), "Train average rmse: {:6f}".format(np.mean(train_loss)))
Normalizing the input data solved the problem.

tf.layers.batch_normalization freezes during sess.run() (1.5.0-dev20171031)

The graph building phase passes without error, but the program freezes (no reading hard drive, no memory change, no ...) during sess.run() in the first mini-batch in the first epoch. If I remove this layer or replace it with tf.contrib.layers.layer_norm, the program runs without issues.
The tensor (x) I pass into tf.layers.batch_normalization has the shape [#batches, 200]. I use most default values, but turned off the center and scale.
x_BN = tf.layers.batch_normalization(
x,
axis=-1,
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name=None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
The tensorflow version I'm using is tf-nightly-gpu (1.5.0-dev20171031 or 1.5.0-dev20171023). Has anyone encountered a similar problem?
Update
This happens when the input of tf.layers.batch_normalization is from tf.nn.bidirectional_dynamic_rnn, please see a simplified code to reproduce this issue:
import tensorflow as tf
import numpy as np
starter_learning_rate = 0.001
decay_steps = 100
decay_rate = 0.96
num_RNN_layers = 3
LSTM_CELL_SIZE = 100
keep_prob = 0.95
with tf.name_scope('Inputs'):
x = tf.placeholder(dtype=tf.float32, shape=[None, 200])
y = tf.placeholder(dtype=tf.float32, shape=[None, 200])
length = tf.placeholder(dtype=tf.int32, shape=[None])
Flg_training = tf.placeholder(dtype=tf.bool, shape=[])
x_1 = tf.expand_dims(x, -1)
with tf.name_scope('BiLSTM'):
dropcells = []
for iiLyr in list(range(num_RNN_layers)):
cell_iiLyr = tf.nn.rnn_cell.LSTMCell(num_units=LSTM_CELL_SIZE, state_is_tuple=True)
dropcells.append(tf.nn.rnn_cell.DropoutWrapper(cell=cell_iiLyr, output_keep_prob=keep_prob)) #,, input_keep_prob=self.keep_prob input_keep_prob=1.0, seed=None
MultiLyr_cell = tf.nn.rnn_cell.MultiRNNCell(cells=dropcells, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=MultiLyr_cell,
cell_bw=MultiLyr_cell,
dtype=tf.float32,
sequence_length=length, #tf_b_lens
inputs=x_1, #stacked_RefPts_desc, #tf_b_VCCs_AMs_BN1
scope = "BiLSTM"
)
#output_fw, output_bw = outputs
states_fw, states_bw = states
c_fw_lstLyr, h_fw_lstLyr = states_fw[-1]
c_bw_lstLyr, h_bw_lstLyr = states_bw[-1]
states_concat1 = tf.concat([h_fw_lstLyr, h_bw_lstLyr], axis = 1, name = 'states_concat')
with tf.name_scope("cs_BN1"):
x_BN = tf.layers.batch_normalization(
states_concat1,
axis=-1, # axis that should be normalized (typically the features axis, in this case the concated states or hidden vectors)
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name="test_BN", #None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
with tf.name_scope("Regression"):
a = tf.get_variable("a", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
b = tf.get_variable("b", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
with tf.name_scope("Prediction"):
y_pred = tf.multiply(x_BN, a) + b
with tf.name_scope('Loss'):
losses = tf.losses.mean_squared_error(y, y_pred, reduction=tf.losses.Reduction.NONE)
mean_loss = tf.reduce_mean(losses)
with tf.name_scope('Training'):
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
decay_steps, decay_rate, staircase=True)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(losses, global_step=global_step)
#x_mean = tf.reduce_mean(x_BN, axis=0)
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter("G:\\Surface_Ozone\\Temp\\", sess.graph)
sess.run(tf.global_variables_initializer())
for ii in list(range(2000)):
x_in = (np.random.rand(20, 200))
y_in = x_in * 1.5 + 3.0
length_in = np.full([20], 200, dtype=np.int32)
_, mean_loss_val, a_val, b_val = sess.run([train_step, mean_loss, a, b], feed_dict={
x: x_in,
Flg_training: True,
y: y_in,
length: length_in
})
if (ii < 50):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
else:
if (ii % 100 == 0):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
print("Normal End.")