Failed to get the value of tensor - tensorflow

When running the MNIST data set, I want to know what actually my model outputs during training the batch.Here is my code:(I haven't added the optimizer and the loss function):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE = 784 # the total pixels of the input images
OUTPUT_NODE = 10 # the output varies from 0 to 9
LAYER_NODE = 500
BATCH_SIZE = 100
TRAINING_STEPS = 10
def inference(input_tensor, avg_class, weight1, biase1, weight2, biase2):
if avg_class == None:
layer = tf.nn.relu(tf.matmul(input_tensor, weight1) + biase1)
return tf.matmul(layer, weight2)+biase2
else:
layer = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weight1)) +
avg_class.average(biase1))
return tf.matmul(layer, avg_class.average(weight2)) + avg_class.average(biase2)
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name = 'x-input')
y = tf.placeholder(tf.float32, [None, OUTPUT_NODE],name = 'y-input')
weight1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER_NODE], stddev = 0.1))
biase1 = tf.Variable(tf.constant(0.1, shape = [LAYER_NODE]))
weight2 = tf.Variable(tf.truncated_normal([LAYER_NODE, OUTPUT_NODE], stddev = 0.1))
biase2 = tf.Variable(tf.constant(0.1, shape = [OUTPUT_NODE]))
out = inference(x, None, weight1, biase1, weight2, biase2)
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x:mnist.validation.images, y:mnist.validation.labels}
test_feed = {x:mnist.test.images, y:mnist.test.labels}
for i in range(TRAINING_STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(out, feed_dict= {x:xs, y:ys})
print(out)
def main(arg = None):
mnist = input_data.read_data_sets("/home/vincent/Tensorflow/MNIST/data/", one_hot = True)
train(mnist)
if __name__ == '__main__':
tf.app.run()
I try to print out:
Tensor("add_1:0", shape=(?, 10), dtype=float32)
If I want to know the value of out, what should I do?
I tried to print(out.eval()), and it raised error

out is a tensor object. If you want to get its value, replace
sess.run(out, feed_dict= {x:xs, y:ys})
print(out)
with
res_out=sess.run(out, feed_dict= {x:xs, y:ys})
print(res_out)

Related

How to make lstm/rnn focus more on certain parts of time series while less on other parts using tensorflow?

I have a time series prediction problem where most of the observed values (95%) are 0s while remaining values are non-zeros. How can I make use of RNN for this problem.
I want to predict surface flow from environmental data(air temperature, rainfall, humidity etc). We know surface flow is 0.0 for most of the time in an year. However, I also don't want to simply ignore 0s as the 0s represent the period of the year when when surface flow is 0.0. The image below shows possible observed output and three inputs. The three inputs here are just random but in reality they will be data like rainfall, humidity etc and these input data have some periodic pattern.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import tensorflow as tf
import sys
print(sys.version)
print('tensorflow version: ', tf.__version__)
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.linspace(0, 20, self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy, percent_of_zeros):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
indices = np.random.choice(np.arange(dataY.size), replace=False,
size=int(dataY.size * percent_of_zeros))
dataY[indices] = 0
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 16
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 1005 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
# xx = abs(xx)
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
zeros = 0.96
dataX, dataY = gen_data.create_dataset(xx1,yy1, zeros)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
plt.plot(dataY, '.', label='output')
plt.plot(xx[:,0], '.', label='input1')
plt.plot(xx[:,1], '.', label='input2')
plt.plot(xx[:,2], '.', label='input3')
plt.legend()
# build neural network
with tf.variable_scope('scope0'): #defining RNN
# cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
cell = tf.keras.layers.LSTMCell(units = 128)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
# Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred1 = tf.keras.layers.Dense(1)(outputs1[:,-1])
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
print('epoch: # {} loss: {}'.format(epoch, step_loss))
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
The problem is that while training, the model focuses on majority of values i.e. 0s and thus makes the predictions equal to 0s. How can I make the model focus on non-zero values (positive surface flow) while at the same time also consider 0s (when there is no surface flow). I have read about attention mechanism but have not understood that how I can implement it in such scenarios.

RNN use mean square error does not converge

I am learning RNN through https://medium.com/#erikhallstrm/hello-world-rnn-83cd7105b767. I change the loss function to mean square error and found it does not converge. The output is stuck at 0.5. Somehow, I feel the mistake is inside
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 1
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
def generateData():
x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
y = np.roll(x, echo_step)
y[0:echo_step] = 0
x = x.reshape((batch_size, -1)) # The first index changing slowest, subseries as rows
y = y.reshape((batch_size, -1))
return (x, y)
tf.reset_default_graph()
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
init_state = tf.placeholder(tf.float32, [batch_size, state_size])
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)
# Forward pass
current_state = init_state
states_series = []
for current_input in inputs_series:
current_input = tf.reshape(current_input, [batch_size, 1])
input_and_state_concatenated = tf.concat([current_input, current_state],axis=1) # Increasing number of columns
next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b) # Broadcasted addition
states_series.append(next_state)
current_state = next_state
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
#Loss function HERE
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
losses = tf.square(midlosses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
loss_list = []
for epoch_idx in range(num_epochs):
x,y = generateData()
_current_state = np.zeros((batch_size, state_size))
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(
[total_loss, train_step, current_state,logits_series,midlosses],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
if batch_idx%100 == 0:
print("Step",batch_idx, "Loss", _total_loss)
Just need to replace
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
by
logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition
Problem can solved.

ValueError: Cannot feed value of shape (128, 28, 28) for Tensor 'Placeholder:0', which has shape '(?, 784)'

I am new to Tensorflow and Machine Learning and trying out CNN using Tensorflow with my custom input data. But I am getting the error attached below.
The Data or Image Size is 28x28 with 15 Labels.
I am not getting the numpy reshape thing in this script or the error.
Help is highly appreciated.
import tensorflow as tf
import os
import skimage.data
import numpy as np
import random
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".jpg")]
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(d)
print(str(d)+' Completed')
return images, labels
ROOT_PATH = "H:\Testing\TrainingData"
train_data_directory = os.path.join(ROOT_PATH, "Training")
test_data_directory = os.path.join(ROOT_PATH, "Testing")
print('Loading Data...')
images, labels = load_data(train_data_directory)
print('Data has been Loaded')
n_classes = 15
training_examples = 10500
test_examples = 4500
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def neural_network_model(x):
weights = {'W_Conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_Conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_FC':tf.Variable(tf.random_normal([7*7*64, 1024])),
'Output':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'B_Conv1':tf.Variable(tf.random_normal([32])),
'B_Conv2':tf.Variable(tf.random_normal([64])),
'B_FC':tf.Variable(tf.random_normal([1024])),
'Output':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1,28,28,1])
conv1 = conv2d(x, weights['W_Conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['W_Conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_FC'])+biases['B_FC'])
output = tf.matmul(fc, weights['Output'])+biases['Output']
return output
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(training_examples/batch_size)):
epoch_x, epoch_y = next_batch(batch_size, images, labels)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x: images, y: labels}))
print('Training Neural Network...')
train_neural_network(x)
What am I doing wrong? What is needed to be fixed and how do I fix the shape of numpy array?
If you look closely, you'll see that you have two x placeholders:
x = tf.placeholder('float', [None, 784]) # global
...
x = tf.reshape(x, shape=[-1,28,28,1]) # in neural_network_model
One of them is in the function scope, hence not visible in train_neural_network, so tensorflow takes the one with [?, 784] shape. You should get rid of one of them.
Also note that your training data has the rank 3, i.e. [batch_size, 28, 28], so it's not directly compatible with any of those placeholders.
To feed it into the first x, take epoch_x.reshape([-1, 784]). For the second placeholder (once you make it visible), take epoch_x.reshape([-1, 28, 28, 1]).

Tensorflow: Can't overfit training data with batch size > 1

I coded a small RNN network with Tensorflow to return the total energy consumption given some parameters. There seem to be a problem in my code. It can't overfit the training data when I use a batch size > 1 (even with only 4 samples!). In the code below, the loss value reaches 0 when I set BatchSize to 1. However, by setting BatchSize to 2, the network fails to overfit and the loss value goes toward 12.500000 and gets stuck there forever.
I suspect this has something to do with LSTM states. I get the same problem if I don't update the state with each iteration. Or maybe the cost function? A help is appreciated. Thanks.
import tensorflow as tf
import numpy as np
import os
from utils import loadData
Epochs = 10000
LearningRate = 0.0001
MaxGradNorm = 5
SeqLen = 1
NChannels = 28
NClasses = 1
NLayers = 2
NUnits = 256
BatchSize = 1
NumSamples = 4
#################################################################
trainingFile = "./training.dat"
X_values, Y_values = loadData(trainingFile, SeqLen, NumSamples)
X = tf.placeholder(tf.float32, [BatchSize, SeqLen, NChannels], name='inputs')
Y = tf.placeholder(tf.float32, [BatchSize, SeqLen, NClasses], name='labels')
keep_prob = tf.placeholder(tf.float32, name='keep')
initializer = tf.contrib.layers.xavier_initializer()
Xin = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))
lstm_layers = []
for i in range(NLayers):
lstm_layer = tf.nn.rnn_cell.LSTMCell(num_units=NUnits, initializer=initializer, use_peepholes=True, state_is_tuple=True)
dropout_layer = tf.contrib.rnn.DropoutWrapper(lstm_layer, output_keep_prob=keep_prob)
#[LSTM ---> DROPOUT] ---> [LSTM ---> DROPOUT] ---> etc...
lstm_layers.append(dropout_layer)
rnn = tf.nn.rnn_cell.MultiRNNCell(lstm_layers, state_is_tuple=True)
initial_state = rnn.zero_state(BatchSize, tf.float32)
outputs, final_state = tf.nn.static_rnn(rnn, Xin, dtype=tf.float32, initial_state=initial_state)
outputs = tf.transpose(outputs, [1,0,2])
outputs = tf.reshape(outputs, [-1, NUnits])
weight = tf.Variable(tf.truncated_normal([NUnits, NClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[NClasses]))
prediction = tf.matmul(outputs, weight) + bias
prediction = tf.reshape(prediction, [BatchSize, SeqLen, NClasses])
cost = tf.reduce_sum(tf.pow(tf.subtract(prediction, Y), 2)) / (2 * BatchSize)
tvars = tf.trainable_variables()
grad, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), MaxGradNorm)
optimizer = tf.train.AdamOptimizer(learning_rate = LearningRate)
train_step = optimizer.apply_gradients(zip(grad, tvars))
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iteration = 1
for e in range(0, Epochs):
train_loss = []
state = sess.run(initial_state)
for i in xrange(0, len(X_values), BatchSize):
x = X_values[i:i + BatchSize]
y = Y_values[i:i + BatchSize]
y = np.expand_dims(y, 2)
feed = {X : x, Y : y, keep_prob : 1.0, initial_state : state}
_ , loss, state, pred = sess.run([train_step, cost, final_state, prediction], feed_dict = feed)
train_loss.append(loss)
iteration += 1
print("Epoch: {}/{}".format(e, Epochs), "Iteration: {:d}".format(iteration), "Train average rmse: {:6f}".format(np.mean(train_loss)))
Normalizing the input data solved the problem.

Tensorflow: TypeError: get_variable() got multiple values for keyword argument 'name'

I am training the "Show and tell" model using tensorflow in which the model automatically generates the captions of the images. How ever I am getting this error.
This is the traceback:
TypeError Traceback (most recent call
last)
<ipython-input-14-b6da0a27b701> in <module>()
1 try:
2 #train(.001,False,False) #train from scratch
----> 3 train(.001,True,True) #continue training from pretrained weights #epoch500
4 #train(.001) #train from previously saved weights
5 except KeyboardInterrupt:
<ipython-input-13-39693d0edd0a> in train(learning_rate, continue_training, transfer)
23 n_words = len(wordtoix)
24 maxlen = np.max( [x for x in map(lambda x: len(x.split(' ')), captions) ] )
---> 25 caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b)
26
27 loss, image, sentence, mask = caption_generator.build_model()
<ipython-input-12-1b31c4175b3a> in __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b)
11 # declare the variables to be used for our word embeddings
12 with tf.device("/cpu:0"):
---> 13 self.word_embedding = tf.get_variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='word_embedding')
14
15 self.embedding_bias = tf.get_variable(tf.zeros([dim_embed]), name='embedding_bias')
TypeError: get_variable() got multiple values for keyword argument 'name'
The problem might be that I am passing some extra arguments to the get_variable initializer but I unable to trace it where this problem is occurring.
Here is the code:
def get_data(annotation_path, feature_path):
annotations = pd.read_table(annotation_path, sep='\t', header=None, names=['image', 'caption'])
return np.load(feature_path,'r'), annotations['caption'].values
def preProBuildWordVocab(sentence_iterator, word_count_threshold=30): # function from Andre Karpathy's NeuralTalk
print('preprocessing %d word vocab' % (word_count_threshold, ))
word_counts = {}
nsents = 0
for sent in sentence_iterator:
nsents += 1
for w in sent.lower().split(' '):
word_counts[w] = word_counts.get(w, 0) + 1
vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
print('preprocessed words %d -> %d' % (len(word_counts), len(vocab)))
ixtoword = {}
ixtoword[0] = '.'
wordtoix = {}
wordtoix['#START#'] = 0
ix = 1
for w in vocab:
wordtoix[w] = ix
ixtoword[ix] = w
ix += 1
word_counts['.'] = nsents
bias_init_vector = np.array([1.0*word_counts[ixtoword[i]] for i in ixtoword])
bias_init_vector /= np.sum(bias_init_vector)
bias_init_vector = np.log(bias_init_vector)
bias_init_vector -= np.max(bias_init_vector)
return wordtoix, ixtoword, bias_init_vector.astype(np.float32)
class Caption_Generator():
def __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b):
self.dim_in = dim_in
self.dim_embed = dim_embed
self.dim_hidden = dim_hidden
self.batch_size = batch_size
self.n_lstm_steps = n_lstm_steps
self.n_words = n_words
# declare the variables to be used for our word embeddings
with tf.device("/cpu:0"):
self.word_embedding = tf.get_variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='word_embedding')
self.embedding_bias = tf.get_variable(tf.zeros([dim_embed]), name='embedding_bias')
# declare the LSTM itself
self.lstm = tf.contrib.rnn.BasicLSTMCell(dim_hidden)
# declare the variables to be used to embed the image feature embedding to the word embedding space
self.img_embedding = tf.get_variable(tf.random_uniform([dim_in, dim_hidden], -0.1, 0.1), name='img_embedding')
self.img_embedding_bias = tf.get_variable(tf.zeros([dim_hidden]), name='img_embedding_bias')
# declare the variables to go from an LSTM output to a word encoding output
self.word_encoding = tf.get_variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='word_encoding')
# initialize this bias variable from the preProBuildWordVocab output
self.word_encoding_bias = tf.get_variable(init_b, name='word_encoding_bias')
def build_model(self):
# declaring the placeholders for our extracted image feature vectors, our caption, and our mask
# (describes how long our caption is with an array of 0/1 values of length `maxlen`
img = tf.placeholder(tf.float32, [self.batch_size, self.dim_in])
caption_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])
# getting an initial LSTM embedding from our image_imbedding
image_embedding = tf.matmul(img, self.img_embedding) + self.img_embedding_bias
# setting initial state of our LSTM
state = self.lstm.zero_state(self.batch_size, dtype=tf.float32)
total_loss = 0.0
with tf.variable_scope("RNN"):
for i in range(self.n_lstm_steps):
if i > 0:
#if this isn’t the first iteration of our LSTM we need to get the word_embedding corresponding
# to the (i-1)th word in our caption
with tf.device("/cpu:0"):
current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias
else:
#if this is the first iteration of our LSTM we utilize the embedded image as our input
current_embedding = image_embedding
if i > 0:
# allows us to reuse the LSTM tensor variable on each iteration
tf.get_variable_scope().reuse_variables()
out, state = self.lstm(current_embedding, state)
#out, state = self.tf.nn.dynamic_rnn(current_embedding, state)
if i > 0:
#get the one-hot representation of the next word in our caption
labels = tf.expand_dims(caption_placeholder[:, i], 1)
ix_range=tf.range(0, self.batch_size, 1)
ixs = tf.expand_dims(ix_range, 1)
concat = tf.concat([ixs, labels],1)
onehot = tf.sparse_to_dense(
concat, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)
#perform a softmax classification to generate the next word in the caption
logit = tf.matmul(out, self.word_encoding) + self.word_encoding_bias
xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=onehot)
xentropy = xentropy * mask[:,i]
loss = tf.reduce_sum(xentropy)
total_loss += loss
total_loss = total_loss / tf.reduce_sum(mask[:,1:])
return total_loss, img, caption_placeholder, mask
### Parameters ###
dim_embed = 256
dim_hidden = 256
dim_in = 4096
batch_size = 128
momentum = 0.9
n_epochs = 150
def train(learning_rate=0.001, continue_training=False, transfer=True):
tf.reset_default_graph()
feats, captions = get_data(annotation_path, feature_path)
wordtoix, ixtoword, init_b = preProBuildWordVocab(captions)
np.save('data/ixtoword', ixtoword)
index = (np.arange(len(feats)).astype(int))
np.random.shuffle(index)
sess = tf.InteractiveSession()
n_words = len(wordtoix)
maxlen = np.max( [x for x in map(lambda x: len(x.split(' ')), captions) ] )
caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b)
loss, image, sentence, mask = caption_generator.build_model()
saver = tf.train.Saver(max_to_keep=100)
global_step=tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate, global_step,
int(len(index)/batch_size), 0.95)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
tf.global_variables_initializer().run()
if continue_training:
if not transfer:
saver.restore(sess,tf.train.latest_checkpoint(model_path))
else:
saver.restore(sess,tf.train.latest_checkpoint(model_path_transfer))
losses=[]
for epoch in range(n_epochs):
for start, end in zip( range(0, len(index), batch_size), range(batch_size, len(index), batch_size)):
current_feats = feats[index[start:end]]
current_captions = captions[index[start:end]]
current_caption_ind = [x for x in map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ')[:-1] if word in wordtoix], current_captions)]
current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=maxlen+1)
current_caption_matrix = np.hstack( [np.full( (len(current_caption_matrix),1), 0), current_caption_matrix] )
current_mask_matrix = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
nonzeros = np.array([x for x in map(lambda x: (x != 0).sum()+2, current_caption_matrix )])
for ind, row in enumerate(current_mask_matrix):
row[:nonzeros[ind]] = 1
_, loss_value = sess.run([train_op, loss], feed_dict={
image: current_feats.astype(np.float32),
sentence : current_caption_matrix.astype(np.int32),
mask : current_mask_matrix.astype(np.float32)
})
print("Current Cost: ", loss_value, "\t Epoch {}/{}".format(epoch, n_epochs), "\t Iter {}/{}".format(start,len(feats)))
print("Saving the model from epoch: ", epoch)
saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
try:
#train(.001,False,False) #train from scratch
train(.001,True,True) #continue training from pretrained weights #epoch500
#train(.001) #train from previously saved weights
except KeyboardInterrupt:
print('Exiting Training')
In your constructor, try
self.word_embedding = tf.get_variable("word_embedding", initializer=tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1))
The thing is, the first position argument is name and you have the initializer there instead, and then you again define the name, hence the error.
You need to make likewise changes everywhere you use tf.get_variable