ValueError: Cannot feed value of shape (128, 28, 28) for Tensor 'Placeholder:0', which has shape '(?, 784)' - numpy

I am new to Tensorflow and Machine Learning and trying out CNN using Tensorflow with my custom input data. But I am getting the error attached below.
The Data or Image Size is 28x28 with 15 Labels.
I am not getting the numpy reshape thing in this script or the error.
Help is highly appreciated.
import tensorflow as tf
import os
import skimage.data
import numpy as np
import random
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".jpg")]
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(d)
print(str(d)+' Completed')
return images, labels
ROOT_PATH = "H:\Testing\TrainingData"
train_data_directory = os.path.join(ROOT_PATH, "Training")
test_data_directory = os.path.join(ROOT_PATH, "Testing")
print('Loading Data...')
images, labels = load_data(train_data_directory)
print('Data has been Loaded')
n_classes = 15
training_examples = 10500
test_examples = 4500
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def neural_network_model(x):
weights = {'W_Conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_Conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_FC':tf.Variable(tf.random_normal([7*7*64, 1024])),
'Output':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'B_Conv1':tf.Variable(tf.random_normal([32])),
'B_Conv2':tf.Variable(tf.random_normal([64])),
'B_FC':tf.Variable(tf.random_normal([1024])),
'Output':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1,28,28,1])
conv1 = conv2d(x, weights['W_Conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['W_Conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_FC'])+biases['B_FC'])
output = tf.matmul(fc, weights['Output'])+biases['Output']
return output
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(training_examples/batch_size)):
epoch_x, epoch_y = next_batch(batch_size, images, labels)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x: images, y: labels}))
print('Training Neural Network...')
train_neural_network(x)
What am I doing wrong? What is needed to be fixed and how do I fix the shape of numpy array?

If you look closely, you'll see that you have two x placeholders:
x = tf.placeholder('float', [None, 784]) # global
...
x = tf.reshape(x, shape=[-1,28,28,1]) # in neural_network_model
One of them is in the function scope, hence not visible in train_neural_network, so tensorflow takes the one with [?, 784] shape. You should get rid of one of them.
Also note that your training data has the rank 3, i.e. [batch_size, 28, 28], so it's not directly compatible with any of those placeholders.
To feed it into the first x, take epoch_x.reshape([-1, 784]). For the second placeholder (once you make it visible), take epoch_x.reshape([-1, 28, 28, 1]).

Related

Recurrent neural network, time series prediction with newer Tensorflow 1.14

How to use new tf.keras API with recurrent neural network? I have checked the documentation but there is no example of such a situation.
There is this great book Hands on machine learning from 2017. Since that year the API of tensorflow has evolved and I am trying to rewrite recurrent neural network for time series prediction with using version 1.14 code.
The code from the book is using older tf.nn.dynamic_rnn and tf.nn.rnn_cell.BasicRNNCell:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
learning_rate = 0.001
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)
rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 500
batch_size = 50
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
And this code works just fine (except that it throws warnings about deprecation left and right). I wanted to use tf.keras API as suggested in warning. My code is the same except:
cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation=tf.nn.relu)
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
But this yields following exception:
InvalidArgumentError: Input to reshape is a tensor with 50 values, but the requested shape requires a multiple of 20
[[node Reshape_1 (defined at <ipython-input-9-879361be49dd>:3) ]]
so I understand that the problematic line is
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
After checking and comparing documentation for both cells https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn and
https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN I can't find the culprit.
What is the difference with these two cells? How to use tf.keras API with time series?
Full old code: https://github.com/ageron/handson-ml/blob/master/14_recurrent_neural_networks.ipynb
Full "my" code:
import numpy as np
import tensorflow as tf
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
from utils import shuffle_batch, variable_summaries
import os
dir_path = os.getcwd()
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
print(dir_path)
t_min, t_max = -5, 5
section_start = (t_max + t_min) / 2
resolution = 0.1
n_steps = 20
def time_series(t):
return np.sin(t)
def next_batch(batch_size, n_steps):
t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
Ts = t0 + np.arange(0., n_steps + 1) * resolution
ys = time_series(Ts)
return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)
t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))
t_instance = np.linspace(start = section_start, stop = section_start + resolution * (n_steps + 1),num = n_steps + 1)
plt.figure(figsize=(11,4))
plt.subplot(121)
plt.title("A time series (generated)", fontsize=14)
plt.plot(t, time_series(t), label=r"original")
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "b-", linewidth=3, label="A training instance")
plt.legend(loc="lower left", fontsize=14)
#plt.axis([-10, 10, -17, 13])
plt.xlabel("Time")
plt.ylabel("Value")
plt.subplot(122)
plt.title("A training instance", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "c*", markersize=10, label="target")
plt.legend(loc="upper left")
plt.xlabel("Time")
# In[6]:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
# In[7]:
cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation=tf.nn.relu)
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
print(rnn_outputs.get_shape())
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons], name='reshape1')
stacked_outputs = tf.keras.layers.Dense(n_outputs,name="hidden2")(stacked_rnn_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs], name='reshape2')
learning_rate = 0.001
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 1500
batch_size = 50
save_path =os.path.join(dir_path,"model","recurrent_sinus_model")
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
saver.save(sess, save_path)
with tf.Session() as sess:
saver.restore(sess, save_path)
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("Testing the model", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "w*", markersize=10, label="target")
plt.plot(t_instance[1:], y_pred[0,:,0], "r.", markersize=10, label="prediction")
plt.legend(loc="upper left")
plt.xlabel("Time")
plt.show()
# In[ ]:
with tf.Session() as sess:
saver.restore(sess, save_path)
X_new = time_series(np.array(t.reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("A time series (generated)", fontsize=14)
plt.plot(t, time_series(t), label=r"original",linewidth=5,c='r')
plt.plot(t[:-1], time_series(t[:-1]), "b-", linewidth=3, label="A training instance")
plt.legend(loc="lower left", fontsize=14)
plt.xlabel("Time")
plt.ylabel("Value")
So the answer is:
rnn_outputs, rnn_states = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1", return_state=True, return_sequences=True)(X)
instead of
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
so the parameter return_sequences=True make the RNN return the time series as well, and well, this is the point.

How to print out prediction value in tensorflow

I am new to tensorflow and I am a slow learner. After successfully compiling the model and get the accuracy I want to print the prediction variable but I dont know how to do it.
My dataset is multivariate feature with only one output. The output contains only 1, 0 ,-1 so I made one hot encoder for the output. I finished compiling the model and looking for computing prediction on tensorflow online, however I didnt find a good solution base on my question.
The precisionCalculate function is to compute precision on each column on test data since the trian_y and test_y after one hot encode becomes [1,0,0],[0,1,0],[0,0,1].
I have tried
y_pred = sess.run(tf.argmax(y, 1), feed_dict={X: test_x, y: test_y})
but it turns out y_pred is exactly the same as my test_y
Here is my full code example.
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow.contrib.rnn
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
import pdb
np.set_printoptions(threshold=np.inf)
def precisionCalculate(pred_y, test_y):
count = pred_y + test_y
firstZero = len(count[count==0])
countFour = len(count[count == 4])
precision1 = firstZero / len(pred_y[pred_y==0] )
precision3 = countFour / len(pred_y[pred_y==2])
pdb.set_trace()
return precision1, precision3
df = pd.read_csv('new_df.csv', skiprows=[0], header=None)
df.drop(columns=[0,1], inplace=True)
df.columns = [np.arange(0, df.shape[1])]
df[0] = df[0].shift(-1)
#parameters
time_steps = 1
inputs = df.shape[1]
outputs = 3
#remove nan as a result of shift values
df = df.iloc[:-1, :]
#convert to numpy
df = df.values
train_number = 30276 #start date from 1018
train_x = df[: train_number, 1:]
test_x = df[train_number:, 1:]
train_y = df[:train_number, 0]
test_y = df[train_number:, 0]
#data pre-processing
#x y split
#scale
scaler = MinMaxScaler(feature_range=(0,1))
train_x = scaler.fit_transform(train_x)
test_x = scaler.fit_transform(test_x)
#reshape into 3d array
train_x = train_x[:, None, :]
test_x = test_x[:, None, :]
#one-hot encode the outputs
onehot_encoder = OneHotEncoder()
#encoder = LabelEncoder()
max_ = train_y.max()
max2 = test_y.max()
train_y = (train_y - max_) * (-1)
test_y = (test_y - max2) * (-1)
encode_categorical = train_y.reshape(len(train_y), 1)
encode_categorical2 = test_y.reshape(len(test_y), 1)
train_y = onehot_encoder.fit_transform(encode_categorical).toarray()
test_y = onehot_encoder.fit_transform(encode_categorical2).toarray()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)
#model parameters
learning_rate = 0.001
epochs = 100
batch_size = int(train_x.shape[0]/10)
length = train_x.shape[0]
display = 100
neurons = 100
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, time_steps, 90],name='x')
y = tf.placeholder(tf.float32, [None, outputs],name='y')
#LSTM cell
cell = tf.contrib.rnn.BasicLSTMCell(num_units = neurons, activation = tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# pass into Dense layer
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
# squared error loss or cost function for linear regression
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=y))
# optimizer to minimize cost
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
accuracy = tf.metrics.accuracy(labels = tf.argmax(y, 1), predictions = tf.argmax(out, 1), name = "accuracy")
precision = tf.metrics.precision(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1), name="precision")
recall = tf.metrics.recall(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1),name="recall")
f1 = 2 * accuracy[1] * recall[1] / ( precision[1] + recall[1] )
with tf.Session() as sess:
# initialize all variables
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
# Train the model
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size), range(batch_size, length+1, batch_size))
epoch_loss = 0
i = 0
# train data in mini-batches
for (start, end) in mini_batch:
sess.run(training_op, feed_dict = {X: train_x[start:end,:,:], y: train_y[start:end,:]})
# print training performance
if (steps+1) % display == 0:
# evaluate loss function on training set
loss_fn = loss.eval(feed_dict = {X: train_x, y: train_y})
print('Step: {} \tTraining loss: {}'.format((steps+1), loss_fn))
# evaluate model accuracy
acc, prec, recall, f1 = sess.run([accuracy, precision, recall, f1],feed_dict = {X: test_x, y: test_y})
y_pred = sess.run(tf.argmax(y, 1), feed_dict={X: train_x, y: train_y})
test_y_alter = np.argmax(test_y, axis=1)
#print(test_y_alter)
print(precisionCalculate(y_pred, test_y_alter))
print(y_pred)
#prediction = y_pred.eval(feed_dict={X: train_x, y: test_y})
#print(prediction)
print('\nEvaluation on test set')
print('Accuracy:', acc[1])
print('Precision:', prec[1])
print('Recall:', recall[1])
print('F1 score:', f1)
I think you should use the output of your model instead of the label (y) in tf.argmax.
Here is my code in order to print prediction of the model:
pred_y = tf.Print(tf.argmax(score, 1), [tf.argmax(score, 1)], message="prediction:)
pred_y.eval()
In the above code, score means the probability output of your model.

tensor flow error: logits and labels must be broadcastable

I am having the following error displayed while trying to get tensorflow running:
InvalidArgumentError: logits and labels must be broadcastable: logits_size=[30,2] labels_size=[8,2]
Below is my code. I obtained parts of the 1st part of the code from https://blog.francium.tech/build-your-own-image-classifier-with-tensorflow-and-keras-dc147a15e38e and the second from https://www.datacamp.com/community/tutorials/cnn-tensorflow-python. I adopted them to something I am working on where I have some images that belong to 2 different classes. For training, each image class are placed in the same training folder and for testing, each image class is placed in the same testing folder. I figure the error is referring to a mismatch between the logits and label. I have tried tweaking the shapes in the weights and biases as defined in the code below, but this didn't solve the issue. I also tried tampering with the batch size, still no solution. Does anyone have any idea what could cause this error? Could it be how I arranged my training and testing set?
ROOT_PATH = "/my/file/path/images"
train_data_directory = os.path.join(ROOT_PATH, "data/train")
test_data_directory = os.path.join(ROOT_PATH, "data/test")
train_data = train_data_directory
test_data = test_data_directory
def one_hot_label(img):
label = img.split('.')[0]
global ohl
ohl = []
if label == 'A':
ohl = np.array([1,0])
elif label == 'B':
ohl = np.array([0,1])
return ohl
def train_data_with_label():
train_images = []
for i in tqdm(os.listdir(train_data)):
path = os.path.join(train_data,i)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28,28))
train_images.append([np.array(img), one_hot_label(i)])
shuffle(train_images)
return train_images
def test_data_with_label():
test_images = []
for i in tqdm(os.listdir(test_data)):
path = os.path.join(test_data,i)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28,28))
test_images.append([np.array(img), one_hot_label(i)])
shuffle(test_images)
return test_images
training_images = train_data_with_label()
testing_images = test_data_with_label()
#both placeholders are of type float
x = tf.placeholder("float", [None, 28,28,1])
y = tf.placeholder("float", [None, n_classes])
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='SAME')
weights = {
'wc1': tf.get_variable('W0', shape=(3,3,1,32), initializer=tf.contrib.layers.xavier_initializer()),
'wc2': tf.get_variable('W1', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()),
'wc3': tf.get_variable('W2', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()),
'wd1': tf.get_variable('W3', shape=(4*4*128,128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('W6', shape=(128,n_classes), initializer=tf.contrib.layers.xavier_initializer()),
}
biases = {
'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('B4', shape=(2), initializer=tf.contrib.layers.xavier_initializer()),
}
def conv_net(x, weights, biases):
# here we call the conv2d function we had defined above and pass the input image x, weights wc1 and bias bc1.
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 14*14 matrix.
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
# here we call the conv2d function we had defined above and pass the input image x, weights wc2 and bias bc2.
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 7*7 matrix.
conv2 = maxpool2d(conv2, k=2)
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 4*4.
conv3 = maxpool2d(conv3, k=2)
#print(conv3.shape)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Output, class prediction
# finally we multiply the fully connected layer with the weights and add a bias term.
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
print(out.shape)
return out
#print(out.shape)
pred = conv_net(x, weights, biases)
#pred.shape
#labelsa = tf.constant(1., shape=y.shape)
#logsa = tf.constant(1., shape=pred.shape)
#labels = labels + tf.zeros_like(logsa)
print(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
print(y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
sess.run(init)
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
summary_writer = tf.summary.FileWriter('./Output', sess.graph)
for i in range(training_iters):
#print('here')
for batch in range(len(train_X)//batch_size):
print('here')
#offset = (batch * batch_size) % (train_Y.shape[0] - batch_size)
batch_x = train_X[batch*batch_size:min((batch+1)*batch_size,len(train_X))]
batch_y = train_Y[batch*batch_size:min((batch+1)*batch_size,len(train_Y))]
# Run optimization op (backprop).
# Calculate batch loss and accuracy
print(batch_y.shape)
opt = sess.run(optimizer, feed_dict={x: batch_x,
y: batch_y})
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y})
print("Iter " + str(i) + ", Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
print("Optimization Finished!")
# Calculate accuracy for all 10000 mnist test images
test_acc,valid_loss = sess.run([accuracy,cost], feed_dict={x: test_X,y: test_Y})
train_loss.append(loss)
test_loss.append(valid_loss)
train_accuracy.append(acc)
test_accuracy.append(test_acc)
print("Testing Accuracy:","{:.5f}".format(test_acc))
summary_writer.close()

Carrying gradients from previous time steps to current time steps with GRU in Tensorflow

I have the following model in tensorflow:
def output_layer(input_layer, num_labels):
'''
:param input_layer: 2D tensor
:param num_labels: int. How many output labels in total? (10 for cifar10 and 100 for cifar100)
:return: output layer Y = WX + B
'''
input_dim = input_layer.get_shape().as_list()[-1]
fc_w = create_variables(name='fc_weights', shape=[input_dim, num_labels],
initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
fc_b = create_variables(name='fc_bias', shape=[num_labels], initializer=tf.zeros_initializer())
fc_h = tf.matmul(input_layer, fc_w) + fc_b
return fc_h
def model(input_features):
with tf.variable_scope("GRU"):
cell1 = tf.nn.rnn_cell.GRUCell(gru1_cell_size)
cell2 = tf.nn.rnn_cell.GRUCell(gru2_cell_size)
mcell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2], state_is_tuple=False)
# shape=(?, 64 + 32)
initial_state = tf.placeholder(shape=[None, gru1_cell_size + gru2_cell_size], dtype=tf.float32, name="initial_state")
output, new_state = tf.nn.dynamic_rnn(mcell, input_features, dtype=tf.float32, initial_state=initial_state)
with tf.variable_scope("output_reshaped"):
# before, shape: (34, 1768, 32), after, shape: (34 * 1768, 32)
output = tf.reshape(output, shape=[-1, gru2_cell_size])
with tf.variable_scope("output_layer"):
# shape: (34 * 1768, 3)
predictions = output_layer(output, num_labels)
predictions = tf.reshape(predictions, shape=[-1, 100, 3])
return predictions, initial_state, new_state, output
So as we can see from the code that the cell size of the first GRU is 64, the cell size of the second GRU is 32. And the batch size is 34 (but this is not important for me now). And the size of input features is 200. I have tried computing the gradients of the loss with respect to the trainable variables through:
local_grads_and_vars = optimizer.compute_gradients(loss, tf.trainable_variables())
# only the gradients are taken to add them later with the back propagated gradients from previous batch.
local_grads = [grad for grad, var in local_grads_and_vars]
for v in local_grads:
print("v", v)
After printing out the grads I got the following:
v Tensor("Optimizer/gradients/GRU_Layer1/rnn/while/gru_cell/MatMul/Enter_grad/b_acc_3:0", shape=(264, 128), dtype=float32)
v Tensor("Optimizer/gradients/GRU_Layer1/rnn/while/gru_cell/BiasAdd/Enter_grad/b_acc_3:0", shape=(128,), dtype=float32)
v Tensor("Optimizer/gradients/GRU_Layer1/rnn/while/gru_cell/MatMul_1/Enter_grad/b_acc_3:0", shape=(264, 64), dtype=float32)
v Tensor("Optimizer/gradients/GRU_Layer1/rnn/while/gru_cell/BiasAdd_1/Enter_grad/b_acc_3:0", shape=(64,), dtype=float32)
v Tensor("Optimizer/gradients/GRU_Layer2/rnn/while/gru_cell/MatMul/Enter_grad/b_acc_3:0", shape=(96, 64), dtype=float32)
v Tensor("Optimizer/gradients/GRU_Layer2/rnn/while/gru_cell/BiasAdd/Enter_grad/b_acc_3:0", shape=(64,), dtype=float32)
v Tensor("Optimizer/gradients/GRU_Layer2/rnn/while/gru_cell/MatMul_1/Enter_grad/b_acc_3:0", shape=(96, 32), dtype=float32)
v Tensor("Optimizer/gradients/GRU_Layer2/rnn/while/gru_cell/BiasAdd_1/Enter_grad/b_acc_3:0", shape=(32,), dtype=float32)
v Tensor("Optimizer/gradients/output_layer/MatMul_grad/tuple/control_dependency_1:0", shape=(32, 3), dtype=float32)
v Tensor("Optimizer/gradients/output_layer/add_grad/tuple/control_dependency_1:0", shape=(3,), dtype=float32)
Assume that I saved the gradients after training the model on the first batch, that is, after feeding a tensor of shape: (34, 100, 200) as input_features "In the model function argument", and output of shape (34 * 100, 3), how to back propagate these gradients on the second mini-batch?
From the documentation of tf.gradients
grad_ys is a list of tensors of the same length as ys that holds the initial gradients for each y in ys. When grad_ys is None, we fill in a tensor of '1's of the shape of y for each y in ys. A user can provide their own initial grad_ys to compute the derivatives using a different initial gradient for each y (e.g., if one wanted to weight the gradient differently for each value in each y).
So your grad_ys should be a list with the same length as the input ys.
Copying your code I was able to get the following to run:
prev_grad_pl = [tf.placeholder(tf.float32, [batch, i]) for i in [64, 32]]
prev_grad_init = {l: np.ones(l.get_shape().as_list()) for l in prev_grad_pl}
prev_grads_val__ = tf.gradients([new_state1, new_state2], [initial_state1, initial_state2], grad_ys=prev_grad_pl)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
feed = {initial_state1: np.zeros([batch, gru1_cell_size]),
initial_state2: np.zeros([batch, gru2_cell_size])}
for k in prev_grad_init:
feed[k] = prev_grad_init[k]
grad1, grad2 = sess.run(prev_grads_val__, feed_dict=feed)
Here is the solution with a custom code:
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps1 = 500
time_steps2 = seq_length - time_steps1
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
input_1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="input_1")
input_2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="input_2")
labels1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="labels_1")
labels2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="labels_2")
labels = tf.concat([labels1, labels2], axis=1, name="labels")
def model(input_feat1, input_feat2):
with tf.variable_scope("GRU"):
cell1 = tf.nn.rnn_cell.GRUCell(cell_size)
cell2 = tf.nn.rnn_cell.GRUCell(cell_size)
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
with tf.variable_scope("First50"):
# output1: shape=[1, time_steps1, 32]
output1, new_state1 = tf.nn.dynamic_rnn(cell1, input_feat1, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("Second50"):
# output2: shape=[1, time_steps2, 32]
output2, new_state2 = tf.nn.dynamic_rnn(cell2, input_feat2, dtype=m_dtype, initial_state=new_state1)
with tf.variable_scope("output"):
# output shape: [1, time_steps1 + time_steps2, 32] => [1, 100, 32]
output = tf.concat([output1, output2], axis=1)
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps1 + time_steps2, 1])
with tf.variable_scope("outputs_1_2_reshaped"):
output1 = tf.slice(input_=output, begin=[0, 0, 0], size=[-1, time_steps1, -1])
output2 = tf.slice(input_=output, begin=[0, time_steps1, 0], size=[-1, time_steps2, 1])
print(output.get_shape().as_list(), "1")
print(output1.get_shape().as_list(), "2")
print(output2.get_shape().as_list(), "3")
return output, output1, output2, initial_state, new_state1, new_state2
def loss(output, output1, output2, labels, labels1, labels2):
loss = tf.reduce_sum(tf.sqrt(tf.square(output - labels)))
loss1 = tf.reduce_sum(tf.sqrt(tf.square(output1 - labels1)))
loss2 = tf.reduce_sum(tf.sqrt(tf.square(output2 - labels2)))
return loss, loss1, loss2
def optimize(loss, loss1, loss2, initial_state, new_state1, new_state2):
with tf.name_scope('Optimizer'):
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
grads1 = tf.gradients(loss2, new_state1)
grads2 = tf.gradients(loss1, initial_state)
grads3 = tf.gradients(new_state1, initial_state, grad_ys=grads1)
grads_wrt_initial_state_1 = tf.add(grads2, grads3)
grads_wrt_initial_state_2 = tf.gradients(loss, initial_state, grad_ys=None)
return grads_wrt_initial_state_1, grads_wrt_initial_state_2
output, output1, output2, initial_state, new_state1, new_state2 = model(input_1, input_2)
loss, loss1, loss2 = loss(output, output1, output2, labels, labels1, labels2)
grads_wrt_initial_state_1, grads_wrt_initial_state_2 = optimize(loss, loss1, loss2, initial_state, new_state1, new_state2)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
in1 = np.reshape(x_t[:time_steps1], newshape=(1, time_steps1, 1))
in2 = np.reshape(x_t[time_steps1:], newshape=(1, time_steps2, 1))
l1 = np.reshape(x_t_plus_1[:time_steps1], newshape=(1, time_steps1, 1))
l2 = np.reshape(x_t_plus_1[time_steps1:], newshape=(1, time_steps2, 1))
i_s = np.zeros([1, cell_size])
t1, t2 = sess.run([grads_wrt_initial_state_1, grads_wrt_initial_state_2], feed_dict={input_1: in1,
input_2: in2,
labels1: l1,
labels2: l2,
initial_state: i_s})
print(np.mean(t1), np.mean(t2))
print(np.sum(t1), np.sum(t2))
This is an example of 2 GRUs one after the other, and I did back propagation in 2 different ways according to the code in optimize()

Optimize input image with class prior

I'm trying to implement the first part of the google blog entry
Inceptionism: Going Deeper into Neural Networks in TensorFlow. So far I have found several resources that either explain it in natural language or focus on other parts or give code snippets for other frameworks. I understand the idea of optimizing a random input image with respect to a class prior and also the maths behind it given in the this paper, section 2, but I'm not able to implement it myself using TensorFlow.
From this SO question and the helpful comment by etarion, I now know that you can give a list of variables to the optimizer, while all other variables are untouched. However, when giving the optimizer a random image as a variable leads to
File "mnist_test.py", line 101, in main
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(-cost, var_list=[rnd_img])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 198, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 309, in apply_gradients
(converted_grads_and_vars,))
ValueError: No gradients provided for any variable: ((None,<tensorflow.python.ops.variables.Variable object at 0x7feac1870410>),)
For testing purpose I used a stripped down MNIST example. I tried to keep it as short as possible while still being readable and executable:
def main():
# parameters
learning_rate = 0.001
train_batches = 1000
batch_size = 128
display_step = 50
# net parameters
n_input = 784 #28x28
n_classes = 10
keep_prob = 0.75
weights = {
'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 32])),
'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64])),
'wd1': tf.Variable(tf.truncated_normal([7*7*64, 1024])),
'out': tf.Variable(tf.truncated_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.constant(0.1, shape=[32])),
'bc2': tf.Variable(tf.constant(0.1, shape=[64])),
'bd1': tf.Variable(tf.constant(0.1, shape=[1024])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
}
# tf inputs
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32)
# create net
net = create_net(x, weights, biases, keep_prob)
# define loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, y))
# define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# evaluation
pred_correct = tf.equal(tf.argmax(net, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(pred_correct, tf.float32))
print "loading mnist data"
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for i in xrange(train_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, dropout: keep_prob})
if i % display_step == 0:
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, dropout: 1.0})
print "batch: %i, loss: %.5f, accuracy: %.5f" % (i, loss, acc)
acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout: 1.0})
print "test accuracy: %.5f" % (acc)
# ====== this is where the reconstruction begins =====
rnd_img = tf.Variable(tf.random_normal([1, n_input]))
one_hot = np.zeros(10)
one_hot[4] = 1;
# the next line causes the error
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(-cost, var_list=[rnd_img])
for i in xrange(1000):
session.run(optimizer2, feed_dict={x: rnd_img, y: one_hot, dropout: 1.0})
sess.close()
if __name__ == "__main__":
main()
The helper functions I used:
def create_net(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = conv2d_relu(x, weights['wc1'], biases['bc1'])
conv1 = maxpool2d(conv1, 2)
conv2 = conv2d_relu(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, 2)
fc1 = fullyconnected_relu(conv2, weights['wd1'], biases['bd1'])
fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
def conv2d_relu(x, W, b, stride=1):
conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
conv = tf.nn.bias_add(conv, b)
return tf.nn.relu(conv)
def maxpool2d(x, k=2, stride=2, padding='VALID'):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, stride, stride, 1], padding=padding)
def fullyconnected_relu(x, W, b):
fc = tf.reshape(x, [-1, W.get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc, W), b)
fc = tf.nn.relu(fc)
I've found some sources saying that this error occurs when there is no path within the computation graph between the output and the variables to be optimize, but I don't see why this should be the case here.
My questions are:
Why isn't the optimizer able to apply any gradients?
Is this the right way to go in order to implement the visualization of a class?
Thanks in advance.
Edit:
Here is the complete code again, after incorporation of the accepted answer (for anyone who is interested). Anyway, the results are still not as expected, as the script basically produces random images after 100000 rounds of reconstruction. Ideas are welcome.
import tensorflow as tf
import numpy as np
import skimage.io
def conv2d_relu(x, W, b, stride=1):
conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
conv = tf.nn.bias_add(conv, b)
return tf.nn.relu(conv)
def maxpool2d(x, k=2, stride=2, padding='VALID'):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, stride, stride, 1], padding=padding)
def fullyconnected_relu(x, W, b):
fc = tf.reshape(x, [-1, W.get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc, W), b)
fc = tf.nn.relu(fc)
return fc;
def create_net(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = conv2d_relu(x, weights['wc1'], biases['bc1'])
conv1 = maxpool2d(conv1, 2)
conv2 = conv2d_relu(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, 2)
fc1 = fullyconnected_relu(conv2, weights['wd1'], biases['bd1'])
fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
def save_image(img_data, name):
img = img_data.reshape(28,28)
mi = np.min(img)
ma = np.max(img)
img = (img-mi)/(ma-mi)
skimage.io.imsave(name, img)
def main():
# parameters
learning_rate = 0.001
train_batches = 1000
batch_size = 100
display_step = 50
# net parameters
n_input = 784 #28x28
n_classes = 10
keep_prob = 0.75
weights = {
'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 32])),
'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64])),
'wd1': tf.Variable(tf.truncated_normal([7*7*64, 1024])),
'out': tf.Variable(tf.truncated_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.constant(0.1, shape=[32])),
'bc2': tf.Variable(tf.constant(0.1, shape=[64])),
'bd1': tf.Variable(tf.constant(0.1, shape=[1024])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
}
# tf inputs
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32)
# create net
net = create_net(x, weights, biases, dropout)
# define loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, y))
# define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# evaluation
pred_correct = tf.equal(tf.argmax(net, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(pred_correct, tf.float32))
print "loading mnist data"
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for i in xrange(train_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, dropout: keep_prob})
if i % display_step == 0:
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, dropout: 1.0})
print "batch: %i, loss: %.5f, accuracy: %.5f" % (i, loss, acc)
acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout: 1.0})
print "test accuracy: %.5f" % (acc)
# reconstruction part
rnd_img = tf.Variable(tf.random_normal([1, n_input]))
one_hot = np.zeros((1, 10))
one_hot[0,1] = 1;
net2 = create_net(rnd_img, weights, biases, dropout)
cost2 = -tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net2, y))
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(cost2, var_list=[rnd_img])
init_var_list = []
for var in tf.all_variables():
if(not tf.is_variable_initialized(var).eval(session=sess)):
init_var_list.append(var)
sess.run(tf.initialize_variables(init_var_list))
save_image(rnd_img.eval(sess), "bevor.tiff")
for i in xrange(100000):
_, loss = sess.run([optimizer2, cost2], feed_dict={y: one_hot, dropout: 1.0})
if(i%10000 == 0):
cur_img = rnd_img.eval(session=sess)
print "loss:", loss, "mi:", np.min(cur_img), "ma:", np.max(cur_img)
save_image(rnd_img.eval(sess), "after.tiff")
sess.close()
if __name__ == "__main__":
main()
Some explanation: After rebuilding the graph with the new input variable and optimizer, I had to initialize the new variables, i.e. the rnd_img and some helper variables used by the Adam optimizer, hence the loop over all_variables() and checking for initialization status. If somebody knows a more elegant way, let me know. Or maybe that's the reason why I don't get any results?
The rnd_img needs to part of the graph that you optimize. In your case, you just create a variable and tell the optimizer to optimize it, but the variable is not connected to the loss in the graph. What you can for example do is use another call to create_net with rnd_image instead of x (but using the same weights!), create the cost for that and then create a minimization op for that cost. Then for optimization you only feed in y.