Tensorflow is completely new to me, my previous work was mostly in ML, and training keras models.I am trying to replicate this version of GAN by Dr.Jinsung Yoon. I am trying to replicate with my own data, for that I need to figure out how to restore model and generate new data. Below is the main part of code running the GAN model
"""Time-series Generative Adversarial Networks (TimeGAN) Codebase.
Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar,
"Time-series Generative Adversarial Networks,"
Neural Information Processing Systems (NeurIPS), 2019.
Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks
Last updated Date: April 24th 2020
Code author: Jinsung Yoon (jsyoon0823#gmail.com)
-----------------------------
timegan.py
Note: Use original data as training set to generater synthetic data (time-series)
"""
# Necessary Packages
import tensorflow as tf
import numpy as np
from utils import extract_time, rnn_cell, random_generator, batch_generator
def timegan (ori_data, parameters):
"""TimeGAN function.
Use original data as training set to generater synthetic data (time-series)
Args:
- ori_data: original time-series data
- parameters: TimeGAN network parameters
Returns:
- generated_data: generated time-series data
"""
# Initialization on the Graph
tf.compat.v1.reset_default_graph()
# Basic Parameters
no, seq_len, dim = np.asarray(ori_data).shape
# Maximum sequence length and each sequence length
ori_time, max_seq_len = extract_time(ori_data)
def MinMaxScaler(data):
"""Min-Max Normalizer.
Args:
- data: raw data
Returns:
- norm_data: normalized data
- min_val: minimum values (for renormalization)
- max_val: maximum values (for renormalization)
"""
min_val = np.min(np.min(data, axis = 0), axis = 0)
data = data - min_val
max_val = np.max(np.max(data, axis = 0), axis = 0)
norm_data = data / (max_val + 1e-7)
return norm_data, min_val, max_val
# Normalization
ori_data, min_val, max_val = MinMaxScaler(ori_data)
## Build a RNN networks
# Network Parameters
hidden_dim = parameters['hidden_dim']
num_layers = parameters['num_layer']
iterations = parameters['iterations']
batch_size = parameters['batch_size']
module_name = parameters['module']
z_dim = dim
gamma = 1
# Input place holders
X = tf.compat.v1.placeholder(tf.float32, [None, max_seq_len, dim], name = "myinput_x")
Z = tf.compat.v1.placeholder(tf.float32, [None, max_seq_len, z_dim], name = "myinput_z")
T = tf.compat.v1.placeholder(tf.int32, [None], name = "myinput_t")
def embedder (X, T):
"""Embedding network between original feature space to latent space.
Args:
- X: input time-series features
- T: input time information
Returns:
- H: embeddings
"""
with tf.compat.v1.variable_scope("embedder", reuse = tf.compat.v1.AUTO_REUSE):
e_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([rnn_cell(module_name, hidden_dim) for _ in range(num_layers)])
e_outputs, e_last_states = tf.compat.v1.nn.dynamic_rnn(e_cell, X, dtype=tf.float32, sequence_length = T)
H = tf.compat.v1.layers.dense(e_outputs, hidden_dim, activation=tf.nn.sigmoid)
return H
def recovery (H, T):
"""Recovery network from latent space to original space.
Args:
- H: latent representation
- T: input time information
Returns:
- X_tilde: recovered data
"""
with tf.compat.v1.variable_scope("recovery", reuse = tf.compat.v1.AUTO_REUSE):
r_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([rnn_cell(module_name, hidden_dim) for _ in range(num_layers)])
r_outputs, r_last_states = tf.compat.v1.nn.dynamic_rnn(r_cell, H, dtype=tf.float32, sequence_length = T)
X_tilde = tf.compat.v1.layers.dense(r_outputs, dim, activation=tf.nn.sigmoid)
return X_tilde
def generator (Z, T):
"""Generator function: Generate time-series data in latent space.
Args:
- Z: random variables
- T: input time information
Returns:
- E: generated embedding
"""
with tf.compat.v1.variable_scope("generator", reuse = tf.compat.v1.AUTO_REUSE):
e_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([rnn_cell(module_name, hidden_dim) for _ in range(num_layers)])
e_outputs, e_last_states = tf.compat.v1.nn.dynamic_rnn(e_cell, Z, dtype=tf.float32, sequence_length = T)
E = tf.compat.v1.layers.dense(e_outputs, hidden_dim, activation=tf.nn.sigmoid)
return E
def supervisor (H, T):
"""Generate next sequence using the previous sequence.
Args:
- H: latent representation
- T: input time information
Returns:
- S: generated sequence based on the latent representations generated by the generator
"""
with tf.compat.v1.variable_scope("supervisor", reuse = tf.compat.v1.AUTO_REUSE):
e_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([rnn_cell(module_name, hidden_dim) for _ in range(num_layers-1)])
e_outputs, e_last_states = tf.compat.v1.nn.dynamic_rnn(e_cell, H, dtype=tf.float32, sequence_length = T)
S = tf.compat.v1.layers.dense(e_outputs, hidden_dim, activation=tf.nn.sigmoid)
return S
def discriminator (H, T):
"""Discriminate the original and synthetic time-series data.
Args:
- H: latent representation
- T: input time information
Returns:
- Y_hat: classification results between original and synthetic time-series
"""
with tf.compat.v1.variable_scope("discriminator", reuse = tf.compat.v1.AUTO_REUSE):
d_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([rnn_cell(module_name, hidden_dim) for _ in range(num_layers)])
d_outputs, d_last_states = tf.compat.v1.nn.dynamic_rnn(d_cell, H, dtype=tf.float32, sequence_length = T)
Y_hat = tf.compat.v1.layers.dense(d_outputs, 1, activation=None)
return Y_hat
# Embedder & Recovery
H = embedder(X, T)
X_tilde = recovery(H, T)
# Generator
E_hat = generator(Z, T)
H_hat = supervisor(E_hat, T)
H_hat_supervise = supervisor(H, T)
# Synthetic data
X_hat = recovery(H_hat, T)
# Discriminator
Y_fake = discriminator(H_hat, T)
Y_real = discriminator(H, T)
Y_fake_e = discriminator(E_hat, T)
# Variables
e_vars = [v for v in tf.compat.v1.trainable_variables() if v.name.startswith('embedder')]
r_vars = [v for v in tf.compat.v1.trainable_variables() if v.name.startswith('recovery')]
g_vars = [v for v in tf.compat.v1.trainable_variables() if v.name.startswith('generator')]
s_vars = [v for v in tf.compat.v1.trainable_variables() if v.name.startswith('supervisor')]
d_vars = [v for v in tf.compat.v1.trainable_variables() if v.name.startswith('discriminator')]
# Discriminator loss
D_loss_real = tf.compat.v1.losses.sigmoid_cross_entropy(tf.ones_like(Y_real), Y_real)
D_loss_fake = tf.compat.v1.losses.sigmoid_cross_entropy(tf.zeros_like(Y_fake), Y_fake)
D_loss_fake_e = tf.compat.v1.losses.sigmoid_cross_entropy(tf.zeros_like(Y_fake_e), Y_fake_e)
D_loss = D_loss_real + D_loss_fake + gamma * D_loss_fake_e
# Generator loss
# 1. Adversarial loss
G_loss_U = tf.compat.v1.losses.sigmoid_cross_entropy(tf.ones_like(Y_fake), Y_fake)
G_loss_U_e = tf.compat.v1.losses.sigmoid_cross_entropy(tf.ones_like(Y_fake_e), Y_fake_e)
# 2. Supervised loss
G_loss_S = tf.compat.v1.losses.mean_squared_error(H[:,1:,:], H_hat_supervise[:,:-1,:])
# 3. Two Momments
G_loss_V1 = tf.compat.v1.reduce_mean(tf.abs(tf.sqrt(tf.nn.moments(X_hat,[0])[1] + 1e-6) - tf.sqrt(tf.nn.moments(X,[0])[1] + 1e-6)))
G_loss_V2 = tf.compat.v1.reduce_mean(tf.abs((tf.nn.moments(X_hat,[0])[0]) - (tf.nn.moments(X,[0])[0])))
G_loss_V = G_loss_V1 + G_loss_V2
# 4. Summation
G_loss = G_loss_U + gamma * G_loss_U_e + 100 * tf.sqrt(G_loss_S) + 100*G_loss_V
# Embedder network loss
E_loss_T0 = tf.compat.v1.losses.mean_squared_error(X, X_tilde)
E_loss0 = 10*tf.math.sqrt(E_loss_T0)
E_loss = E_loss0 + 0.1*G_loss_S
# optimizer
E0_solver = tf.compat.v1.train.AdamOptimizer().minimize(E_loss0, var_list = [e_vars,r_vars])
E_solver = tf.compat.v1.train.AdamOptimizer().minimize(E_loss, var_list = [e_vars , r_vars])
D_solver = tf.compat.v1.train.AdamOptimizer().minimize(D_loss, var_list = [d_vars])
G_solver = tf.compat.v1.train.AdamOptimizer().minimize(G_loss, var_list = [g_vars , s_vars])
GS_solver = tf.compat.v1.train.AdamOptimizer().minimize(G_loss_S, var_list = [g_vars , s_vars])
## TimeGAN training
saver = tf.compat.v1.train.Saver()
sess = tf.compat.v1.Session()
sess.run(tf.compat.v1.global_variables_initializer())
saver.save(sess, 'my_test_model')
# 1. Embedding network training
print('Start Embedding Network Training')
for itt in range(iterations):
# Set mini-batch
X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
# Train embedder
_, step_e_loss = sess.run([E0_solver, E_loss_T0], feed_dict={X: X_mb, T: T_mb})
# Checkpoint
if itt % 1000 == 0:
print('step: '+ str(itt) + '/' + str(iterations) + ', e_loss: ' + str(np.round(np.sqrt(step_e_loss),4)) )
print('Finish Embedding Network Training')
# 2. Training only with supervised loss
print('Start Training with Supervised Loss Only')
for itt in range(iterations):
# Set mini-batch
X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
# Random vector generation
Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
# Train generator
_, step_g_loss_s = sess.run([GS_solver, G_loss_S], feed_dict={Z: Z_mb, X: X_mb, T: T_mb})
# Checkpoint
if itt % 1000 == 0:
print('step: '+ str(itt) + '/' + str(iterations) +', s_loss: ' + str(np.round(np.sqrt(step_g_loss_s),4)) )
print('Finish Training with Supervised Loss Only')
# 3. Joint Training
print('Start Joint Training')
for itt in range(iterations):
# Generator training (twice more than discriminator training)
for kk in range(2):
# Set mini-batch
X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
# Random vector generation
Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
# Train generator
_, step_g_loss_u, step_g_loss_s, step_g_loss_v = sess.run([G_solver, G_loss_U, G_loss_S, G_loss_V], feed_dict={Z: Z_mb, X: X_mb, T: T_mb})
# Train embedder
_, step_e_loss_t0 = sess.run([E_solver, E_loss_T0], feed_dict={Z: Z_mb, X: X_mb, T: T_mb})
# Discriminator training
# Set mini-batch
X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
# Random vector generation
Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
# Check discriminator loss before updating
check_d_loss = sess.run(D_loss, feed_dict={X: X_mb, T: T_mb, Z: Z_mb})
# Train discriminator (only when the discriminator does not work well)
if (check_d_loss > 0.15):
_, step_d_loss = sess.run([D_solver, D_loss], feed_dict={X: X_mb, T: T_mb, Z: Z_mb})
# Print multiple checkpoints
if itt % 1000 == 0:
print('step: '+ str(itt) + '/' + str(iterations) +
', d_loss: ' + str(np.round(step_d_loss,4)) +
', g_loss_u: ' + str(np.round(step_g_loss_u,4)) +
', g_loss_s: ' + str(np.round(np.sqrt(step_g_loss_s),4)) +
', g_loss_v: ' + str(np.round(step_g_loss_v,4)) +
', e_loss_t0: ' + str(np.round(np.sqrt(step_e_loss_t0),4)) )
print('Finish Joint Training')
## Synthetic data generation
Z_mb = random_generator(no, z_dim, ori_time, max_seq_len)
generated_data_curr = sess.run(X_hat, feed_dict={Z: Z_mb, X: ori_data, T: ori_time})
generated_data = list()
for i in range(no):
temp = generated_data_curr[i,:ori_time[i],:]
generated_data.append(temp)
# Renormalization
generated_data = generated_data * max_val
generated_data = generated_data + min_val
return generated_data
Now, after saving the session, how to restore the model, and generate new data?
What variables do I need to callback, to restore the model and run something like .sample or .predict. Thanks!
Related
I have a time series prediction problem where most of the observed values (95%) are 0s while remaining values are non-zeros. How can I make use of RNN for this problem.
I want to predict surface flow from environmental data(air temperature, rainfall, humidity etc). We know surface flow is 0.0 for most of the time in an year. However, I also don't want to simply ignore 0s as the 0s represent the period of the year when when surface flow is 0.0. The image below shows possible observed output and three inputs. The three inputs here are just random but in reality they will be data like rainfall, humidity etc and these input data have some periodic pattern.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import tensorflow as tf
import sys
print(sys.version)
print('tensorflow version: ', tf.__version__)
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.linspace(0, 20, self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy, percent_of_zeros):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
indices = np.random.choice(np.arange(dataY.size), replace=False,
size=int(dataY.size * percent_of_zeros))
dataY[indices] = 0
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 16
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 1005 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
# xx = abs(xx)
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
zeros = 0.96
dataX, dataY = gen_data.create_dataset(xx1,yy1, zeros)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
plt.plot(dataY, '.', label='output')
plt.plot(xx[:,0], '.', label='input1')
plt.plot(xx[:,1], '.', label='input2')
plt.plot(xx[:,2], '.', label='input3')
plt.legend()
# build neural network
with tf.variable_scope('scope0'): #defining RNN
# cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
cell = tf.keras.layers.LSTMCell(units = 128)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
# Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred1 = tf.keras.layers.Dense(1)(outputs1[:,-1])
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
print('epoch: # {} loss: {}'.format(epoch, step_loss))
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
The problem is that while training, the model focuses on majority of values i.e. 0s and thus makes the predictions equal to 0s. How can I make the model focus on non-zero values (positive surface flow) while at the same time also consider 0s (when there is no surface flow). I have read about attention mechanism but have not understood that how I can implement it in such scenarios.
I am training the "Show and tell" model using tensorflow in which the model automatically generates the captions of the images. How ever I am getting this error.
This is the traceback:
------------------------------------------------------------------------
---
ValueError Traceback (most recent call
last)
<ipython-input-36-b6da0a27b701> in <module>()
1 try:
2 #train(.001,False,False) #train from scratch
----> 3 train(.001,True,True) #continue training from pretrained weights #epoch500
4 #train(.001) #train from previously saved weights
5 except KeyboardInterrupt:
ipython-input-35-39693d0edd0a> in train(learning_rate, continue_training, transfer)
31 learning_rate = tf.train.exponential_decay(learning_rate, global_step,
32 int(len(index)/batch_size), 0.95)
---> 33 train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
34 tf.global_variables_initializer().run()
35
/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/optimizer.pyc in minimize(self, loss, global_step, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, name, grad_loss)
320 "No gradients provided for any variable, check your graph for ops"
321 " that do not support gradients, between variables %s
and loss %s." %
--> 322 ([str(v) for _, v in grads_and_vars], loss))
323
324 return self.apply_gradients(grads_and_vars,
global_step=global_step,
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["tf.Variable 'word_embedding:0' shape=(2943, 256) dtype=float32_ref>", "tf.Variable 'embedding_bias:0' shape=(256,) dtype=float32_ref>", "tf.Variable 'img_embedding:0' shape=(4096, 256) dtype=float32_ref>", "tf.Variable 'img_embedding_bias:0' shape=(256,) dtype=float32_ref>", "tf.Variable 'word_encoding:0' shape=(256, 2943) dtype=float32_ref>", "tf.Variable 'word_encoding_bias:0' shape=(2943,) dtype=float32_ref>"] and loss Tensor("RNN/div:0", shape=(), dtype=float32).
I know that the error is due to the fact that there is a variable which doesen't holds the gradient during optimisation which in turn is cutting the graph but I am unable to pick it out.
I am using already trained VGG-net 16 model parameters and the FLICKR-30 image dataset having corresponding annotations.
This is the code:
def get_data(annotation_path, feature_path):
annotations = pd.read_table(annotation_path, sep='\t', header=None, names=['image', 'caption'])
return np.load(feature_path,'r'), annotations['caption'].values
def preProBuildWordVocab(sentence_iterator, word_count_threshold=30): # function from Andre Karpathy's NeuralTalk
print('preprocessing %d word vocab' % (word_count_threshold, ))
word_counts = {}
nsents = 0
for sent in sentence_iterator:
nsents += 1
for w in sent.lower().split(' '):
word_counts[w] = word_counts.get(w, 0) + 1
vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
print('preprocessed words %d -> %d' % (len(word_counts), len(vocab)))
ixtoword = {}
ixtoword[0] = '.'
wordtoix = {}
wordtoix['#START#'] = 0
ix = 1
for w in vocab:
wordtoix[w] = ix
ixtoword[ix] = w
ix += 1
word_counts['.'] = nsents
bias_init_vector = np.array([1.0*word_counts[ixtoword[i]] for i in ixtoword])
bias_init_vector /= np.sum(bias_init_vector)
bias_init_vector = np.log(bias_init_vector)
bias_init_vector -= np.max(bias_init_vector)
return wordtoix, ixtoword, bias_init_vector.astype(np.float32)
class Caption_Generator():
def __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b):
self.dim_in = dim_in
self.dim_embed = dim_embed
self.dim_hidden = dim_hidden
self.batch_size = batch_size
self.n_lstm_steps = n_lstm_steps
self.n_words = n_words
# declare the variables to be used for our word embeddings
with tf.device("/cpu:0"):
self.word_embedding = tf.Variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='word_embedding')
self.embedding_bias = tf.Variable(tf.zeros([dim_embed]), name='embedding_bias')
# declare the LSTM itself
self.lstm = tf.contrib.rnn.BasicLSTMCell(dim_hidden)
# declare the variables to be used to embed the image feature embedding to the word embedding space
self.img_embedding = tf.Variable(tf.random_uniform([dim_in, dim_hidden], -0.1, 0.1), name='img_embedding')
self.img_embedding_bias = tf.Variable(tf.zeros([dim_hidden]), name='img_embedding_bias')
# declare the variables to go from an LSTM output to a word encoding output
self.word_encoding = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='word_encoding')
# initialize this bias variable from the preProBuildWordVocab output
self.word_encoding_bias = tf.Variable(init_b, name='word_encoding_bias')
def build_model(self):
# declaring the placeholders for our extracted image feature vectors, our caption, and our mask
# (describes how long our caption is with an array of 0/1 values of length `maxlen`
img = tf.placeholder(tf.float32, [self.batch_size, self.dim_in])
caption_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])
# getting an initial LSTM embedding from our image_imbedding
image_embedding = tf.matmul(img, self.img_embedding) + self.img_embedding_bias
# setting initial state of our LSTM
state = self.lstm.zero_state(self.batch_size, dtype=tf.float32)
total_loss = 0.0
with tf.variable_scope("RNN"):
for i in range(self.n_lstm_steps):
if i > 0:
#if this isn’t the first iteration of our LSTM we need to get the word_embedding corresponding
# to the (i-1)th word in our caption
with tf.device("/cpu:0"):
current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias
else:
#if this is the first iteration of our LSTM we utilize the embedded image as our input
current_embedding = image_embedding
if i > 0:
# allows us to reuse the LSTM tensor variable on each iteration
tf.get_variable_scope().reuse_variables()
out, state = self.lstm(current_embedding, state)
#out, state = self.tf.nn.dynamic_rnn(current_embedding, state)
if i > 0:
#get the one-hot representation of the next word in our caption
labels = tf.expand_dims(caption_placeholder[:, i], 1)
ix_range=tf.range(0, self.batch_size, 1)
ixs = tf.expand_dims(ix_range, 1)
concat = tf.concat([ixs, labels],1)
onehot = tf.sparse_to_dense(
concat, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)
#perform a softmax classification to generate the next word in the caption
logit = tf.matmul(out, self.word_encoding) + self.word_encoding_bias
xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=onehot)
xentropy = xentropy * mask[:,i]
loss = tf.reduce_sum(xentropy)
total_loss += loss
total_loss = total_loss / tf.reduce_sum(mask[:,1:])
return total_loss, img, caption_placeholder, mask
### Parameters ###
dim_embed = 256
dim_hidden = 256
dim_in = 4096
batch_size = 128
momentum = 0.9
n_epochs = 150
def train(learning_rate=0.001, continue_training=False, transfer=True):
tf.reset_default_graph()
feats, captions = get_data(annotation_path, feature_path)
wordtoix, ixtoword, init_b = preProBuildWordVocab(captions)
np.save('data/ixtoword', ixtoword)
index = (np.arange(len(feats)).astype(int))
np.random.shuffle(index)
sess = tf.InteractiveSession()
n_words = len(wordtoix)
maxlen = np.max( [x for x in map(lambda x: len(x.split(' ')), captions) ] )
caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b)
loss, image, sentence, mask = caption_generator.build_model()
saver = tf.train.Saver(max_to_keep=100)
global_step=tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate, global_step,
int(len(index)/batch_size), 0.95)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
tf.global_variables_initializer().run()
if continue_training:
if not transfer:
saver.restore(sess,tf.train.latest_checkpoint(model_path))
else:
saver.restore(sess,tf.train.latest_checkpoint(model_path_transfer))
losses=[]
for epoch in range(n_epochs):
for start, end in zip( range(0, len(index), batch_size), range(batch_size, len(index), batch_size)):
current_feats = feats[index[start:end]]
current_captions = captions[index[start:end]]
current_caption_ind = [x for x in map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ')[:-1] if word in wordtoix], current_captions)]
current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=maxlen+1)
current_caption_matrix = np.hstack( [np.full( (len(current_caption_matrix),1), 0), current_caption_matrix] )
current_mask_matrix = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
nonzeros = np.array([x for x in map(lambda x: (x != 0).sum()+2, current_caption_matrix )])
for ind, row in enumerate(current_mask_matrix):
row[:nonzeros[ind]] = 1
_, loss_value = sess.run([train_op, loss], feed_dict={
image: current_feats.astype(np.float32),
sentence : current_caption_matrix.astype(np.int32),
mask : current_mask_matrix.astype(np.float32)
})
print("Current Cost: ", loss_value, "\t Epoch {}/{}".format(epoch, n_epochs), "\t Iter {}/{}".format(start,len(feats)))
print("Saving the model from epoch: ", epoch)
saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
Branching in the loss building routine is invalid.
with tf.variable_scope("RNN"):
for i in range(self.n_lstm_steps):
if i > 0:
[...]
else:
[...]
if i > 0:
[...]
if i > 0:
[...]
Note, that last two ifs will never run, as they are in the else clause, meaning that i <= 0. Consequently your loss is actually a constant, equal 0, and thus TF do not see how to optimise it wrt. variables.
I am training the "Show and tell" model using tensorflow in which the model automatically generates the captions of the images. How ever I am getting this error.
This is the traceback:
TypeError Traceback (most recent call
last)
<ipython-input-14-b6da0a27b701> in <module>()
1 try:
2 #train(.001,False,False) #train from scratch
----> 3 train(.001,True,True) #continue training from pretrained weights #epoch500
4 #train(.001) #train from previously saved weights
5 except KeyboardInterrupt:
<ipython-input-13-39693d0edd0a> in train(learning_rate, continue_training, transfer)
23 n_words = len(wordtoix)
24 maxlen = np.max( [x for x in map(lambda x: len(x.split(' ')), captions) ] )
---> 25 caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b)
26
27 loss, image, sentence, mask = caption_generator.build_model()
<ipython-input-12-1b31c4175b3a> in __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b)
11 # declare the variables to be used for our word embeddings
12 with tf.device("/cpu:0"):
---> 13 self.word_embedding = tf.get_variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='word_embedding')
14
15 self.embedding_bias = tf.get_variable(tf.zeros([dim_embed]), name='embedding_bias')
TypeError: get_variable() got multiple values for keyword argument 'name'
The problem might be that I am passing some extra arguments to the get_variable initializer but I unable to trace it where this problem is occurring.
Here is the code:
def get_data(annotation_path, feature_path):
annotations = pd.read_table(annotation_path, sep='\t', header=None, names=['image', 'caption'])
return np.load(feature_path,'r'), annotations['caption'].values
def preProBuildWordVocab(sentence_iterator, word_count_threshold=30): # function from Andre Karpathy's NeuralTalk
print('preprocessing %d word vocab' % (word_count_threshold, ))
word_counts = {}
nsents = 0
for sent in sentence_iterator:
nsents += 1
for w in sent.lower().split(' '):
word_counts[w] = word_counts.get(w, 0) + 1
vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
print('preprocessed words %d -> %d' % (len(word_counts), len(vocab)))
ixtoword = {}
ixtoword[0] = '.'
wordtoix = {}
wordtoix['#START#'] = 0
ix = 1
for w in vocab:
wordtoix[w] = ix
ixtoword[ix] = w
ix += 1
word_counts['.'] = nsents
bias_init_vector = np.array([1.0*word_counts[ixtoword[i]] for i in ixtoword])
bias_init_vector /= np.sum(bias_init_vector)
bias_init_vector = np.log(bias_init_vector)
bias_init_vector -= np.max(bias_init_vector)
return wordtoix, ixtoword, bias_init_vector.astype(np.float32)
class Caption_Generator():
def __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b):
self.dim_in = dim_in
self.dim_embed = dim_embed
self.dim_hidden = dim_hidden
self.batch_size = batch_size
self.n_lstm_steps = n_lstm_steps
self.n_words = n_words
# declare the variables to be used for our word embeddings
with tf.device("/cpu:0"):
self.word_embedding = tf.get_variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='word_embedding')
self.embedding_bias = tf.get_variable(tf.zeros([dim_embed]), name='embedding_bias')
# declare the LSTM itself
self.lstm = tf.contrib.rnn.BasicLSTMCell(dim_hidden)
# declare the variables to be used to embed the image feature embedding to the word embedding space
self.img_embedding = tf.get_variable(tf.random_uniform([dim_in, dim_hidden], -0.1, 0.1), name='img_embedding')
self.img_embedding_bias = tf.get_variable(tf.zeros([dim_hidden]), name='img_embedding_bias')
# declare the variables to go from an LSTM output to a word encoding output
self.word_encoding = tf.get_variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='word_encoding')
# initialize this bias variable from the preProBuildWordVocab output
self.word_encoding_bias = tf.get_variable(init_b, name='word_encoding_bias')
def build_model(self):
# declaring the placeholders for our extracted image feature vectors, our caption, and our mask
# (describes how long our caption is with an array of 0/1 values of length `maxlen`
img = tf.placeholder(tf.float32, [self.batch_size, self.dim_in])
caption_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])
# getting an initial LSTM embedding from our image_imbedding
image_embedding = tf.matmul(img, self.img_embedding) + self.img_embedding_bias
# setting initial state of our LSTM
state = self.lstm.zero_state(self.batch_size, dtype=tf.float32)
total_loss = 0.0
with tf.variable_scope("RNN"):
for i in range(self.n_lstm_steps):
if i > 0:
#if this isn’t the first iteration of our LSTM we need to get the word_embedding corresponding
# to the (i-1)th word in our caption
with tf.device("/cpu:0"):
current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias
else:
#if this is the first iteration of our LSTM we utilize the embedded image as our input
current_embedding = image_embedding
if i > 0:
# allows us to reuse the LSTM tensor variable on each iteration
tf.get_variable_scope().reuse_variables()
out, state = self.lstm(current_embedding, state)
#out, state = self.tf.nn.dynamic_rnn(current_embedding, state)
if i > 0:
#get the one-hot representation of the next word in our caption
labels = tf.expand_dims(caption_placeholder[:, i], 1)
ix_range=tf.range(0, self.batch_size, 1)
ixs = tf.expand_dims(ix_range, 1)
concat = tf.concat([ixs, labels],1)
onehot = tf.sparse_to_dense(
concat, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)
#perform a softmax classification to generate the next word in the caption
logit = tf.matmul(out, self.word_encoding) + self.word_encoding_bias
xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=onehot)
xentropy = xentropy * mask[:,i]
loss = tf.reduce_sum(xentropy)
total_loss += loss
total_loss = total_loss / tf.reduce_sum(mask[:,1:])
return total_loss, img, caption_placeholder, mask
### Parameters ###
dim_embed = 256
dim_hidden = 256
dim_in = 4096
batch_size = 128
momentum = 0.9
n_epochs = 150
def train(learning_rate=0.001, continue_training=False, transfer=True):
tf.reset_default_graph()
feats, captions = get_data(annotation_path, feature_path)
wordtoix, ixtoword, init_b = preProBuildWordVocab(captions)
np.save('data/ixtoword', ixtoword)
index = (np.arange(len(feats)).astype(int))
np.random.shuffle(index)
sess = tf.InteractiveSession()
n_words = len(wordtoix)
maxlen = np.max( [x for x in map(lambda x: len(x.split(' ')), captions) ] )
caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b)
loss, image, sentence, mask = caption_generator.build_model()
saver = tf.train.Saver(max_to_keep=100)
global_step=tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate, global_step,
int(len(index)/batch_size), 0.95)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
tf.global_variables_initializer().run()
if continue_training:
if not transfer:
saver.restore(sess,tf.train.latest_checkpoint(model_path))
else:
saver.restore(sess,tf.train.latest_checkpoint(model_path_transfer))
losses=[]
for epoch in range(n_epochs):
for start, end in zip( range(0, len(index), batch_size), range(batch_size, len(index), batch_size)):
current_feats = feats[index[start:end]]
current_captions = captions[index[start:end]]
current_caption_ind = [x for x in map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ')[:-1] if word in wordtoix], current_captions)]
current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=maxlen+1)
current_caption_matrix = np.hstack( [np.full( (len(current_caption_matrix),1), 0), current_caption_matrix] )
current_mask_matrix = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
nonzeros = np.array([x for x in map(lambda x: (x != 0).sum()+2, current_caption_matrix )])
for ind, row in enumerate(current_mask_matrix):
row[:nonzeros[ind]] = 1
_, loss_value = sess.run([train_op, loss], feed_dict={
image: current_feats.astype(np.float32),
sentence : current_caption_matrix.astype(np.int32),
mask : current_mask_matrix.astype(np.float32)
})
print("Current Cost: ", loss_value, "\t Epoch {}/{}".format(epoch, n_epochs), "\t Iter {}/{}".format(start,len(feats)))
print("Saving the model from epoch: ", epoch)
saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
try:
#train(.001,False,False) #train from scratch
train(.001,True,True) #continue training from pretrained weights #epoch500
#train(.001) #train from previously saved weights
except KeyboardInterrupt:
print('Exiting Training')
In your constructor, try
self.word_embedding = tf.get_variable("word_embedding", initializer=tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1))
The thing is, the first position argument is name and you have the initializer there instead, and then you again define the name, hence the error.
You need to make likewise changes everywhere you use tf.get_variable
Environment info
Operating System: Windows 7 64-bit
Tensorflow installed from pre-built pip (no CUDA): 1.0.1
Python 3.5.2 64-bit
Problem
I have problems with restoring my net (RNN character base language model). Below is a simplified version with the same problem.
When I run it the first time, I get, for example, this.
...
step 160: loss = 1.956 (perplexity = 7.069016620211226)
step 180: loss = 1.837 (perplexity = 6.274748642468816)
step 200: loss = 1.825 (perplexity = 6.202084762557817)
But on the second run, after restoring parameters, I get this.
step 220: loss = 2.346 (perplexity = 10.446611983898903)
step 240: loss = 2.346 (perplexity = 10.446709120339545)
...
All the tf variables seem to be correctly restored, including the state, which will be fed to RNN.
Data position is also restored (from 'step').
I also made a similar program for MNIST recognition model, and this one works fine: the losses before and after the restoring are continuous.
Are there any other parameters or states that should be saved and restored?
import argparse
import os
import tensorflow as tf
import numpy as np
import math
B = 20 # batch size
H = 200 # size of hidden layer of neurons
T = 25 # number of time steps to unroll the RNN for
data_file = 'ptb.train.txt' # any plain text file will do
checkpoint_dir = "tmp"
#----------------
# prepare data
#----------------
data = open(data_file, 'r').read()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has {0} characters, {1} unique.'.format(data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
input_index_raw = np.array([char_to_ix[ch] for ch in data])
input_index_raw = input_index_raw[0:len(input_index_raw) // T * T]
input_index_raw_shift = np.append(input_index_raw[1:], input_index_raw[0])
input_all = input_index_raw.reshape([-1, T])
target_all = input_index_raw_shift.reshape([-1, T])
num_packed_data = len(input_all)
#----------------
# build model
#----------------
class Model(object):
def __init__(self):
self.input_ph = tf.placeholder(tf.int32, [None, T], name="input_ph")
self.target_ph = tf.placeholder(tf.int32, [None, T], name="target_ph")
embedding = tf.get_variable("embedding", [vocab_size, H], initializer=tf.random_normal_initializer(), dtype=tf.float32)
# input_ph is B x T.
# input_embedded is B x T x H.
input_embedded = tf.nn.embedding_lookup(embedding, self.input_ph)
cell = tf.contrib.rnn.BasicRNNCell(H)
self.state_ph = tf.placeholder(tf.float32, (None, cell.state_size), name="state_ph")
# Make state variable so that it will be saved by the saver.
self.state = tf.get_variable("state", (B, cell.state_size), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.float32)
# Construct initial_state according to whether restoring or not.
self.isRestore = tf.placeholder(tf.bool, shape=(), name="isRestore")
zero_state = cell.zero_state(B, dtype=tf.float32)
self.initial_state = tf.cond(self.isRestore, lambda: self.state, lambda: zero_state)
# input_embedded : B x T x H
# output: B x T x H
# state : B x cell.state_size
output, state_ = tf.nn.dynamic_rnn(cell, input_embedded, initial_state=self.state_ph)
self.final_state = tf.assign(self.state, state_)
# reshape to (B * T) x H.
output_flat = tf.reshape(output, [-1, H])
# Convert hidden layer's output to vector of logits for each vocabulary.
softmax_w = tf.get_variable("softmax_w", [H, vocab_size], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=tf.float32)
logits = tf.matmul(output_flat, softmax_w) + softmax_b
# cross_entropy is a vector of length B * T
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(self.target_ph, [-1]), logits=logits)
self.loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
self.global_step = tf.get_variable("global_step", (), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.int32)
self.training_op = optimizer.minimize(cross_entropy, global_step=self.global_step)
def train_batch(self, sess, input_batch, target_batch, initial_state):
final_state_, _, final_loss = sess.run([self.final_state, self.training_op, self.loss], feed_dict={self.input_ph: input_batch, self.target_ph: target_batch, self.state_ph: initial_state})
return final_state_, final_loss
# main
with tf.Session() as sess:
if not tf.gfile.Exists(checkpoint_dir):
tf.gfile.MakeDirs(checkpoint_dir)
batch_stride = num_packed_data // B
# make model
model = Model()
saver = tf.train.Saver()
# always initialize
init = tf.global_variables_initializer()
init.run()
# restore if necessary
isRestore = False
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt:
isRestore = True
last_model = ckpt.model_checkpoint_path
print("Loading " + last_model)
saver.restore(sess, last_model)
# set initial step
step = tf.train.global_step(sess, model.global_step) + 1
print("start step = {0}".format(step))
# fetch initial state
state = sess.run(model.initial_state, feed_dict={model.isRestore: isRestore})
print("Initial state: {0}".format(state))
while True:
# prepare batch data
idx = [(step + x * batch_stride) % num_packed_data for x in range(0, B)]
input_batch = input_all[idx]
target_batch = target_all[idx]
state, last_loss = model.train_batch(sess, input_batch, target_batch, state)
if step % 20 == 0:
print('step {0}: loss = {1:.3f} (perplexity = {2})'.format(step, last_loss, math.exp(last_loss)))
if step % 200 == 0:
saved_file = saver.save(sess, os.path.join(checkpoint_dir, "model.ckpt"), global_step=step)
print("Saved to " + saved_file)
print("Last state: {0}".format(model.state.eval()))
break;
step = step + 1
The problem is solved. It had nothing to do with RNN nor TensorFlow.
I changed
chars = list(set(data))
to
chars = sorted(set(data))
and now it works.
This is because python uses a random hash function to build the set, and every time python restarted, 'chars' had a different ordering.
I want to implement a Siamese MLP network using mnist dataset.
I built my code based on Keras mnist_siamese_graph, but error value and accuracy are very huge compare to Keras version.
I cannot figure out where are problems.
This is my code:
import random
import numpy as np
import time
import tensorflow as tf
import input_data
mnist = input_data.read_data_sets("/tmp/data",one_hot=False)
import pdb
def create_pairs(x, digit_indices):
'''Positive and negative pair creation.
Alternates between positive and negative pairs.
'''
pairs = []
labels = []
n = min([len(digit_indices[d]) for d in range(10)]) - 1
for d in range(10):
for i in range(n):
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
pairs += [[x[z1], x[z2]]]
inc = random.randrange(1, 10)
dn = (d + inc) % 10
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
pairs += [[x[z1], x[z2]]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
def mlp(input_,input_dim,output_dim,name="mlp"):
with tf.variable_scope(name):
w = tf.get_variable('w',[input_dim,output_dim],tf.float32,tf.random_normal_initializer())
return tf.nn.relu(tf.matmul(input_,w))
def build_model_mlp(X_,_dropout):
model = mlpnet(X_,_dropout)
return model
def mlpnet(image,_dropout):
l1 = mlp(image,784,128,name='l1')
l1 = tf.nn.dropout(l1,_dropout)
l2 = mlp(l1,128,128,name='l2')
l2 = tf.nn.dropout(l2,_dropout)
l3 = mlp(l2,128,128,name='l3')
return l3
def contrastive_loss(y,d):
tmp= y *tf.square(d)
#tmp= tf.mul(y,tf.square(d))
tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0))
return tf.reduce_sum(tmp +tmp2)/batch_size/2
def compute_accuracy(prediction,labels):
return labels[prediction.ravel() < 0.5].mean()
#return tf.reduce_mean(labels[prediction.ravel() < 0.5])
def next_batch(s,e,inputs,labels):
input1 = inputs[s:e,0]
input2 = inputs[s:e,1]
y= np.reshape(labels[s:e],(len(range(s,e)),1))
return input1,input2,y
# Initializing the variables
init = tf.initialize_all_variables()
# the data, shuffled and split between train and test sets
X_train = mnist.train._images
y_train = mnist.train._labels
X_test = mnist.validation._images
y_test = mnist.validation._labels
batch_size =128
# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
tr_pairs, tr_y = create_pairs(X_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
te_pairs, te_y = create_pairs(X_test, digit_indices)
images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L')
images_R = tf.placeholder(tf.float32,shape=([None,784]),name='R')
labels = tf.placeholder(tf.float32,shape=([None,1]),name='gt')
dropout_f = tf.placeholder("float")
with tf.variable_scope("siamese") as scope:
model1= build_model_mlp(images_L,dropout_f)
scope.reuse_variables()
model2 = build_model_mlp(images_R,dropout_f)
distance = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(model1,model2),2),1,keep_dims=True))
loss = contrastive_loss(labels,distance)
#contrastice loss
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if 'l' in var.name]
batch = tf.Variable(0)
optimizer = tf.train.RMSPropOptimizer(0.001,momentum=0.9,epsilon=1e-6).minimize(loss)
# Launch the graph
with tf.Session() as sess:
#sess.run(init)
tf.initialize_all_variables().run()
# Training cycle
for epoch in range(40):
print('epoch %d' % epoch)
avg_loss = 0.
avg_acc = 0.
total_batch = int(X_train.shape[0]/batch_size)
start_time = time.time()
# Loop over all batches
for i in range(total_batch):
s = i * batch_size
e = (i+1) *batch_size
# Fit training using batch data
input1,input2,y =next_batch(s,e,tr_pairs,tr_y)
_,loss_value,predict=sess.run([optimizer,loss,distance], feed_dict={images_L:input1,images_R:input2 ,labels:y,dropout_f:0.9})
tr_acc = compute_accuracy(predict,y)
avg_loss += loss_value
avg_acc +=tr_acc*100
#print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))
duration = time.time() - start_time
print('epoch %d time: %f loss %0.2f acc %0.2f' %(epoch,duration,avg_loss/(total_batch),avg_acc/total_batch))
y = np.reshape(tr_y,(tr_y.shape[0],1))
predict=distance.eval(feed_dict={images_L:tr_pairs[:,0],images_R:tr_pairs[:,1],labels:y,dropout_f:1.0})
tr_acc = compute_accuracy(predict,y)
print('Accuract training set %0.2f' % (100 * tr_acc))