I'm trying to make RNN using LSTM.
I made LSTM model, and after it, there is two DNN network, and one regression output layer.
I trained my data, and the final training loss become about 0.009.
However, when i applied the model to test data, the loss become about 0.5.
The 1th epoch training loss is about 0.5.
So, I think the trained variable do not used in test model.
The only difference between training and test model is batch size.
Trainning Batch = 100~200, Test Batch Size = 1.
in main function i made LSTM instance.
In LSTM innitializer, the model is made.
def __init__(self,config,train_model=None):
self.sess = sess = tf.Session()
self.num_steps = num_steps = config.num_steps
self.lstm_size = lstm_size = config.lstm_size
self.num_features = num_features = config.num_features
self.num_layers = num_layers = config.num_layers
self.num_hiddens = num_hiddens = config.num_hiddens
self.batch_size = batch_size = config.batch_size
self.train = train = config.train
self.epoch = config.epoch
self.learning_rate = learning_rate = config.learning_rate
with tf.variable_scope('model') as scope:
self.lstm_cell = lstm_cell = tf.nn.rnn_cell.LSTMCell(lstm_size,initializer = tf.contrib.layers.xavier_initializer(uniform=False))
self.cell = cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
with tf.name_scope('placeholders'):
self.x = tf.placeholder(tf.float32,[self.batch_size,num_steps,num_features],
name='input-x')
self.y = tf.placeholder(tf.float32, [self.batch_size,num_features],name='input-y')
self.init_state = cell.zero_state(self.batch_size,tf.float32)
with tf.variable_scope('model'):
self.W1 = tf.Variable(tf.truncated_normal([lstm_size*num_steps,num_hiddens],stddev=0.1),name='W1')
self.b1 = tf.Variable(tf.truncated_normal([num_hiddens],stddev=0.1),name='b1')
self.W2 = tf.Variable(tf.truncated_normal([num_hiddens,num_hiddens],stddev=0.1),name='W2')
self.b2 = tf.Variable(tf.truncated_normal([num_hiddens],stddev=0.1),name='b2')
self.W3 = tf.Variable(tf.truncated_normal([num_hiddens,num_features],stddev=0.1),name='W3')
self.b3 = tf.Variable(tf.truncated_normal([num_features],stddev=0.1),name='b3')
self.output, self.loss = self.inference()
tf.initialize_all_variables().run(session=sess)
tf.initialize_variables([self.b2]).run(session=sess)
if train_model == None:
self.train_step = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)
Using Above LSTM init, below LSTM instance are made.
with tf.variable_scope("model",reuse=None):
train_model = LSTM(main_config)
with tf.variable_scope("model", reuse=True):
predict_model = LSTM(predict_config)
after making two LSTM instance, I trained the train_model.
And I input the test set in predict_model.
Why the variable are not reused?
The problem is that you should be using tf.get_variable() to create your variables, instead of tf.Variable(), if you are reusing a scope.
Take a look at this tutorial for sharing variables, you'll understand it better.
Also, you don't need to use a session here, because you don't have to initialize your variables when you are defining the model, the variables should be initialized when you are about to train your model.
The code to reuse the variables is the following:
def __init__(self,config,train_model=None):
self.num_steps = num_steps = config.num_steps
self.lstm_size = lstm_size = config.lstm_size
self.num_features = num_features = config.num_features
self.num_layers = num_layers = config.num_layers
self.num_hiddens = num_hiddens = config.num_hiddens
self.batch_size = batch_size = config.batch_size
self.train = train = config.train
self.epoch = config.epoch
self.learning_rate = learning_rate = config.learning_rate
with tf.variable_scope('model') as scope:
self.lstm_cell = lstm_cell = tf.nn.rnn_cell.LSTMCell(lstm_size,initializer = tf.contrib.layers.xavier_initializer(uniform=False))
self.cell = cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
with tf.name_scope('placeholders'):
self.x = tf.placeholder(tf.float32,[self.batch_size,num_steps,num_features],
name='input-x')
self.y = tf.placeholder(tf.float32, [self.batch_size,num_features],name='input-y')
self.init_state = cell.zero_state(self.batch_size,tf.float32)
with tf.variable_scope('model'):
self.W1 = tf.get_variable(initializer=tf.truncated_normal([lstm_size*num_steps,num_hiddens],stddev=0.1),name='W1')
self.b1 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens],stddev=0.1),name='b1')
self.W2 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens,num_hiddens],stddev=0.1),name='W2')
self.b2 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens],stddev=0.1),name='b2')
self.W3 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens,num_features],stddev=0.1),name='W3')
self.b3 = tf.get_variable(initializer=tf.truncated_normal([num_features],stddev=0.1),name='b3')
self.output, self.loss = self.inference()
if train_model == None:
self.train_step = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)
To see which variables are created after you create train_model and predict_model use the following code:
for v in tf.all_variables():
print(v.name)
Related
I have the following Base Network with some important (error is coming due to these) parameters (please assume every else parameter)
maxlen = 250
model_dense = 256
Base Model :
def build_base_model(inputs):
inputs = layers.Input(shape=(maxlen,),name='base_input')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, trans_drop1, trans_drop2, trans_reg1, trans_reg2)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(model_drop1)(x)
outputs = layers.Dense(model_dense)(x)
base_model = keras.Model(inputs=inputs, outputs=outputs)
return base_model
and I my Siamese network as:
base_model = build_base_model()
input_text1 = layers.Input(shape=(maxlen,))
input_text2 = layers.Input(shape=(maxlen,))
emb1 = base_model(input_text1)
emb2 = base_model(input_text2)
distance = layers.Lambda(euclidean_distance)([emb1, emb2])
outputs = layers.Dense(1, activation="sigmoid")(distance)
model = keras.Model(inputs=[emb1, emb2], outputs=outputs)
model.compile(
optimizer="adam", metrics = ["accuracy",], loss= 'binary_crossentropy')
history = model.fit(
train_X, train_y, batch_size=batch_size, epochs = 50, validation_split = 0.15, callbacks = callbacks, verbose = 1,
)
It gives me an error as:
ValueError: Input 0 of layer "model_11" is incompatible with the layer: expected shape=(None, 256), found shape=(None, 250)
What am I doing wrong?
Base Transformer model tutorial taken from this
Siamese Model Structure, cosine distance, make_pairs from this
UPDATE- I have built the new network in a different manner and it is up and running. Can someone please confirms if it is the correct one:
inputs1 = layers.Input(shape=(maxlen,),name='inp_1')
inputs2 = layers.Input(shape=(maxlen,),name='inp_2')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, trans_drop1, trans_drop2, trans_reg1, trans_reg2)
pooling = layers.GlobalAveragePooling1D()
drop_layer = layers.Dropout(model_drop1)
out_dense = layers.Dense(model_dense)
x1 = embedding_layer(inputs1)
x2 = embedding_layer(inputs2)
x1 = transformer_block(x1)
x2 = transformer_block(x2)
x1 = pooling(x1)
x2 = pooling(x2)
x1 = drop_layer(x1)
x2 = drop_layer(x2)
vec_x1 = out_dense(x1)
vec_x2 = out_dense(x2)
distance = layers.Lambda(euclidean_distance)([vec_x1, vec_x2])
outputs = layers.Dense(1, activation="sigmoid")(distance)
model = keras.Model(inputs=[inputs1, inputs2], outputs=outputs)
in the linemodel = keras.Model(inputs=[emb1, emb2], outputs=outputs):
I suspect that you are mean to saymodel = keras.Model(inputs=[input_text1, input_text2], outputs=outputs)
I'm trying to train a GAN using tensorflow, but during graph construction I'm getting this error:
ValueError: Input 0 of layer conv1_1 is incompatible with the layer: its rank is undefined, but the layer requires a defined rank.
I'm guessing this is because my discriminator and generator are within different functions which I'm calling. I would ideally want to avoid rolling my entire architecture out of methods, as this would lead to a tediously-long python file.
I've tried using a default value for the placeholders before the first training examples are pushed through a sess.run() call, but this led to the same example being run through the graph at each stage of training (probably because that's how tensorflow constructed the graph)
My training loop code is below. Please let me know if seeing the generator and discriminator functions themselves would help.
img1 = tf.placeholder(dtype = tf.float32)
img2 = tf.placeholder(dtype = tf.float32)
lr = tf.placeholder(dtype = tf.float32)
synthetic_imgs, synthetic_logits, mse, _ = self.gen(img1)
fake_result, fake_logits_hr, fake_feature_2 = self.disc(img1, synthetic_imgs)
ground_truth_result, ground_truth_logits_hr, truth_feature_2 = self.disc(img1, img2)
_, fake_logits_lr = self.disc_two(img1, synthetic_imgs)
_, ground_truth_logits_lr = self.disc_two(img1, img2)
a = tf.nn.sigmoid_cross_entropy_with_logits
dis_labels = tf.random.uniform((self.batch_size, 1), minval = -0.2, maxval = 0.3)
gen_labels = tf.random.uniform((self.batch_size, 1), minval = 0.75, maxval = 1.2)
dis_loss = #Discriminator Loss
gen_loss = #Generator Loss
#May want to change to -log(MSE)
d_vars = [var for var in tf.trainable_variables() if 'disc' in var.name]
g_vars = [var for var in tf.trainable_variables() if 'g_' in var.name]
dis_loss = tf.reduce_mean(dis_loss)
gen_loss = tf.reduce_mean(gen_loss)
with tf.variable_scope('optimizers', reuse = tf.AUTO_REUSE) as scope:
gen_opt = tf.train.AdamOptimizer(learning_rate = lr, name = 'gen_opt')
disc_opt = tf.train.AdamOptimizer(learning_rate = lr, name = 'dis_opt')
gen1 = gen_opt.minimize(gen_loss, var_list = g_vars)
disc1 = disc_opt.minimize(dis_loss, var_list = d_vars)
#Tensorboard code for visualizing gradients
#global_step = variable_scope.get_variable("global_step", [], trainable=False, dtype=dtypes.int64, initializer=init_ops.constant_initializer(0, dtype=dtypes.int64))
# gen_training = tf.contrib.layers.optimize_loss(gen_loss, global_step, learning_rate=lr, optimizer=gen_opt, summaries=["gradients"], variables = g_vars)
# disc_training = tf.contrib.layers.optimize_loss(dis_loss, global_step, learning_rate=lr, optimizer=disc_opt, summaries=["gradients"], variables = d_vars)
#summary = tf.summary.merge_all()
with tf.Session() as sess:
print('start session')
sess.run(tf.global_variables_initializer())
#print(tf.trainable_variables()) #Find variable corresponding to conv filter weights, which you can use for tensorboard visualization
#Code to load each training example
gen = self.pairs()
for i in range(self.num_epochs):
print(str(i+ 1) + 'th epoch')
for j in range(self.num_batches):
i_1 = None
i_2 = None
#Crektes batch
for k in range(self.batch_size):
p = next(gen)
try:
i_1 = np.concatenate((i1, self.load_img(p[0])), axis = 0)
i_2 = np.concatenate((i2, self.load_img(p[1])), axis = 0)
except Exception:
i_1 = self.load_img(p[0])
i_2 = self.load_img(p[1])
l_r = 8e-4 * (0.5)**(i//100) #Play around with this value
test, gLoss, _ = sess.run([img1, gen_loss, gen1], feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
dLoss, _ = sess.run([dis_loss, disc1], feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
print(test.shape)
cv2.imwrite('./saved_imgs/gan_test'+str(j)+'.png', np.squeeze(test, axis = 3)[0])
#Code to display gradients and other relevant stats on tensorboard
#Will be under histogram tab, labelled OptimizeLoss
# if j%500 == 0:
# writer = tf.summary.FileWriter(sess.graph, logdir = './tensorboard/1') #Change logdir for each run
# summary_str = sess.run(summary, feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
# writer.add_summmary(summary_str, str(i)+': '+str(j))#Can change to one epoch only if necessary
# writer.flush()
if j % 12 == 0: #Prints loss statistics every 12th batch
#print('Epoch: '+str(i))
print('Generator Loss: '+str(gLoss))
print('Discriminator Loss: '+str(dLoss))
self.save_model(sess, i)
I am currently going through Google's Machine Learning Crash Course and I am experimenting with the DNNClassifier estimator for a binary classification problem. I am trying add regularisation (L1/L2) to Adam optimiser since it has not been defined as an argument in the function. Any ideas how to implement it? Below is my code:
steps = 1000
periods = 10
steps_per_period = steps / periods
my_optimiser = tf.train.AdamOptimizer(learning_rate = learning_rate)
my_optimiser = tf.contrib.estimator.clip_gradients_by_norm(my_optimiser, 5.0)
dnn_classifier = tf.estimator.DNNClassifier(
feature_columns = construct_feature_columns(training_features),
n_classes = 2,
hidden_units = hidden_units,
optimizer = my_optimiser)
training_input_fn = lambda: my_input_fn(
training_features,
training_targets,
batch_size = batch_size)
predict_training_input_fn = lambda: my_input_fn(
training_features,
training_targets,
num_epochs = 1,
shuffle = False)
predict_validation_input_fn = lambda: my_input_fn(
validation_features,
validation_targets,
num_epochs = 1,
shuffle = False)
training_log_losses = []
validation_log_losses = []
for period in range (0, periods):
dnn_classifier.train(
input_fn = training_input_fn,
steps = steps_per_period
)
I created a simple image classification network using tensor flow and trained it successfully. But while testing the model with same input image I am getting different prediction results. Details are given below
Only two classes (namely dog and cat) are present in the dataset. Converted the dataset to tfrecord before using them. The network architecture is shown below.
def conv_layer(input, channels_in, fileter_size,channels_out,name="conv"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([fileter_size,fileter_size,channels_in,channels_out],stddev =0.1),name = name + "/W")
b = tf.Variable(tf.constant(0.1,shape= [channels_out]),name= name +"/B")
conv = tf.nn.conv2d(input,w,strides=[1,1,1,1],padding = "SAME")
ret = tf.nn.max_pool( (conv + b),ksize = [1,2,2,1], strides = [1,2,2,1],padding = "SAME")
return(ret)
def fc_layer(input,channels_in,channels_out,name = "fc"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([channels_in,channels_out],stddev =0.1),name = name +"/W")
b = tf.Variable(tf.constant(0.1,shape= [channels_out]),name= name +"/B")
mul = tf.matmul(input,w)
ret = tf.add(mul,b, name = "logits")
return(ret)
def inference(image_batch):
fc1_size = 128
num_classes = 2
conv1 = conv_layer(image_batch,3,3,32,"conv1")
conv2 = conv_layer(conv1,32,3,32,"conv2")
conv3 = conv_layer(conv2,32,3,64,"conv3")
shape = conv3.get_shape()
#shape = tf.shape(conv3)
flat = tf.reshape(conv3,shape = [-1,shape[1:4].num_elements()])
fc1 = fc_layer(flat,shape[1:4].num_elements(),fc1_size,"fc1")
logits = fc_layer(fc1,fc1_size,num_classes,"fc2")
return(logits)
Training of model is done by following code
label, image = ReadData.read_and_decode_single_example(["trian.tfrecords"],2,128*128*3)
label = tf.argmax(label)
image = tf.cast(image, tf.float32)
# groups examples into batches randomly
image = tf.reshape(image,shape = [128,128,3])
image = tf.multiply(image,1.0/255,name = "in_image")#np.multiply(image, 1.0 / 255.0)
image_batch_ph = tf.placeholder(tf.float32,shape = [None,128,128,3],name = "image_batch_ph")
label_batch_ph = tf.placeholder(tf.int64,shape = [None],name = "label_batch_ph")
images_batch, labels_batch = tf.train.shuffle_batch(
[image, label], batch_size=32,
capacity=2000,
min_after_dequeue=1000,name = "shuffle_step")
predict = inference(image_batch_ph)
y_pred = tf.nn.softmax(predict,name='y_pred')
loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels = label_batch_ph,logits = predict)
train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)
loss_mean = tf.reduce_mean(loss)
correct_prediction = tf.equal(tf.argmax(predict,1),label_batch_ph)
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
sess = tf.Session()
init = tf.global_variables_initializer()
saver = tf.train.Saver()
sess.run(init)
tf.train.start_queue_runners(sess=sess)
i = 0
while i < 10000:
#labels = sess.run(labels_batch)
#print labels
imgs,lbs = sess.run([images_batch,labels_batch])
_,lossMean = sess.run([train_op,loss_mean],feed_dict={image_batch_ph:imgs,label_batch_ph:lbs})
if i % 1000 == 0:
print "iteration ",i,"Loss :",lossMean
if i%2000 == 0:
acc = sess.run([accuracy],feed_dict={image_batch_ph:imgs,label_batch_ph:lbs})
print "iteration ",i,"accuracy :",acc
i += 1
save_path = saver.save(sess, "./model/model.ckpt")
print( "model is saved at %s",save_path)
"ReadData.read_and_decode_single_example()" function return a single image tensor and corresponding label tensor. The current session (trained model) is saved in the folder ./model
For testing, I wrote another script shown below. My intention is to load the session saved by above script and use the model to classify an image.
import cv2
image_size = 128
image = cv2.imread("./dog.11.jpg")
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
image = tf.cast(image, tf.float32)
image = tf.reshape(image,shape = [128,128,3])
image = tf.multiply(image,1.0/255,name = "in_image")#np.multiply(image, 1.0 / 255.0)
image = tf.expand_dims(image, 0)
sess = tf.Session()
new_saver = tf.train.import_meta_graph('./model/model.ckpt.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./model/'))
init = tf.global_variables_initializer()
sess.run(init)
image_batch_ph = tf.get_default_graph().get_tensor_by_name("image_batch_ph:0")
test_image = sess.run(image)
y_pred = tf.get_default_graph().get_tensor_by_name("y_pred:0")
predicted_labels = sess.run(y_pred,feed_dict={image_batch_ph:test_image})
print predicted_labels
While testing the model with same image multiple time prediction results are different. I am not able to figure out what went wrong.
Mistake was mine. While using a pre-trained model from check point you don't need
init = tf.global_variables_initializer()
sess.run(init)
This will initialize the weights with random new values.
I am training an RNN-based language-model using Tensorflow. The model is very similar to the PTB model example in the TF tutorials section. However, when I attempt to train the model on my own data, the perplexity of the model does not go down; it remains constant throughout multiple epochs. Could anyone let me know what I might be doing wrong.
I have a feeling that I am not handling the targets properly, but the gist of my code for the targets is:
def batcher(batch_size,unroll_steps,data,pad):
print(len(data))
batches = len(data) / batch_size
inp = []
target = []
for i in range(batches):
#print(len(data[i*batch_size:(i+1)*batch_size]))
x = data[i*batch_size:(i+1)*batch_size]
y = [ line[1:]+[pad] for line in x ]
yield (x,y)
That is, I just shift the data by 1 and use that as the target for the next word in a sentence.
The training script and model (class) are seen below
Training script (excerpt):
def train(session, model, folder,batch_size,unroll_steps,epoch):
word_to_id, id_to_word, train, val = build_inputs(folder,unroll_steps)
pad = word_to_id['<pad>']
costs = 0
iters = 0
train_size = len(train)
batch_size = model.batch_size
batches = train_size / batch_size
state = session.run(model._initial_state)
print("Running epoch %d" % epoch)
for i in range(batches):
fetches = [model.cost, model._final_state, model.logits]
feed_dict = {}
x = train[i*batch_size:(i+1)*batch_size]
y = [ line[1:] +[pad] for line in x ]
feed_dict[model.input] = x
feed_dict[model.targets] = y
feed_dict[model._initial_state] = state
#print("Cell-state complete - Running")
cost, state, logits = session.run(fetches, feed_dict)
#print("Single Run complete")
costs += cost
iters += model.unroll_steps
print("\tEpoch %d: Perplexity is %f" % (epoch, np.exp(costs/iters)))
return np.exp(costs/iters)
Model:
import tensorflow as tf
class LM(object):
def __init__(self, train, max_gradient, batch_size, unroll_steps, vocab, size, layers, learning_rate, init, prob):
self.batch_size = batch_size
self.max_gradient = max_gradient
self.layers = layers
self.learning_rate = learning_rate
self.unroll_steps = unroll_steps
self.init = init
#with tf. name_scope("Paramters"):
with tf.device('/gpu:0'), tf.name_scope("Input"):
self.input = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="input")
self.targets = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="targets")
#self.init = tf.placeholder(tf.float32, shape=[], name="init")
with tf.device('/gpu:0'), tf.name_scope("Embedding"):
embedding = tf.Variable(tf.random_uniform([vocab, size], -self.init, self.init), dtype=tf.float32, name="embedding")
embedded_input = tf.nn.embedding_lookup(embedding, self.input, name="embedded_input")
with tf.device('/gpu:0'), tf.name_scope("RNN"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
if train and prob < 1.0:
lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=prob)
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell for _ in range(layers)], state_is_tuple=True)
self._initial_state = cell.zero_state(batch_size, tf.float32)
outputs = []
state = self._initial_state
for step in range(unroll_steps):
if step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(embedded_input[:, step, :], state)
outputs.append(cell_output)
with tf.device('/gpu:0'), tf.name_scope("Cost"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
output = tf.reshape(tf.concat(outputs,1), [-1,size])
softmax_w = tf.get_variable("softmax_w", [size, vocab], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b
losses = []
for logit, target in zip([logits], [tf.reshape(self.targets,[-1])]):
target = tf.reshape(target, [-1])
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=target)
losses.append(loss)
self.cost = tf.reduce_sum(losses) / batch_size
self._final_state = state
self.logits = logits
scope.reuse_variables()
if not train:
return
with tf.device('/gpu:0'), tf.name_scope("Train"), tf.variable_scope(tf.get_variable_scope(), reuse=False):
train_variables = tf.trainable_variables()
gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_variables),self.max_gradient)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.training = optimizer.apply_gradients(zip(gradients, train_variables))
tf.get_variable_scope().reuse_variables()