Gradient = nan when using DenseVariational layer - tensorflow

I work with binary data (named mimic) and i want to do a bayesian model to reproduce this data. To do so, i define this model :
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = tf.keras.Sequential(
[
tfp.layers.DistributionLambda(
lambda t: tfp.distributions.MultivariateNormalDiag(
loc=tf.zeros(n), scale_diag=tf.ones(n)
)
)
]
)
return prior_model
def posterior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
posterior_model = tf.keras.Sequential(
[
tfp.layers.VariableLayer(
tfp.layers.MultivariateNormalTriL.params_size(n), dtype=dtype
),
tfp.layers.MultivariateNormalTriL(n),
]
)
return posterior_model
model = tf.keras.Sequential([
tfkl.Input(shape=(), name='dummy_input'),
tfpl.DistributionLambda(lambda t:
latentNormal,
convert_to_tensor_fn=lambda x : x.sample(batchSize)
),
tfp.layers.DenseVariational(units=inputDim,make_prior_fn=prior,make_posterior_fn=posterior,activation="sigmoid",use_bias=False),
tfpl.DistributionLambda(lambda t:
tfd.Bernoulli(probs=t)
)
])
Then i train the model :
negloglik = lambda data: -model(69).log_prob(data)
optimizer = tf.keras.optimizers.Adam()
loo=[]
kls = []
for epoch in trange(100):
# print(epoch)
# model.fit(mimic[:1453*32], mimic[:1453*32], epochs=1, batch_size=batchSize, verbose=0)
idx = np.random.choice(np.arange(mimic.shape[0]), size=3*batchSize, replace=False)
shuffled_ds = mimic.numpy()[idx]
for nBatch in range(3):
# print(nBatch)
batch = shuffled_ds[nBatch*batchSize:(1+nBatch)*batchSize]
with tf.GradientTape() as tape:
tape.watch(model.trainable_variables)
loss = negloglik(batch)
loo.append(loss)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
kl = tf.reduce_mean(tfd.kl_divergence(model(0), real_dist))
kls.append(kl.numpy())
More precisely, when i run for 1 epoch and one batch, my model is full of nan. enter image description here and the gradient is also nan enter image description here
Do you have any idea how can i solve that please ?
I tried to replace the VariationalDense by a Denser layer and everything works well. I don't get why DenseVariational is a problem here.

Related

Why my chemical vae cannot learn any thing with toy dataset?

I m trying to implement a mini version of chemical vae referred in this paper: 10.1021/acscentsci.7b00572. The model can be successfully trained, and the loss is changing. However, the predicted properties of all samples are same, near to the mean value. And the autoencoder cannot reconstruct the input data. It means the model cannot learn anything by training. I have carefully check my codes, but failed to find any wrong. Can any one help? Thank you.
Here is my code:
import numpy as np
import tensorflow as tf
# example smiles and properties
smiles = ['CCCCO', 'C1CCCCC1', 'C[C##H](C(=O)O)N', 'C[C#H](C(=O)O)N', 'CC(=O)O'] * 200
y = [1,2,3,4,5] * 200
# smiles to one-hot
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
dicts = set(''.join(smiles))
num_words = len(dicts) + 1
max_lens = 15
tokenizer = Tokenizer(num_words=num_words, char_level=True)
tokenizer.fit_on_texts(smiles)
sequences = tokenizer.texts_to_sequences(smiles)
sequences = pad_sequences(sequences, maxlen = max_lens, padding='post', truncating='post')
x = to_categorical(sequences, num_classes=num_words)
# model
from tensorflow.keras import layers, Model
class VAEWithRegressor(Model):
"""Combines a variational autoencoder with a property regressor."""
def __init__(self, latent_dim):
super(VAEWithRegressor, self).__init__()
# Define the encoder layers
self.encoder = tf.keras.Sequential(
[
layers.InputLayer(input_shape=x[0].shape),
layers.GRU(units=64, return_sequences=True),
layers.BatchNormalization(),
layers.GRU(units=32),
layers.BatchNormalization(),
layers.Dense(units=16),
layers.BatchNormalization(),
layers.Dense(latent_dim * 2),
]
)
# Define the decoder layers
self.decoder = tf.keras.Sequential(
[
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(units=16),
layers.BatchNormalization(),
layers.Dense(units=32),
layers.BatchNormalization(),
layers.RepeatVector(max_lens),
layers.GRU(units = max_lens, return_sequences=True),
layers.BatchNormalization(),
layers.TimeDistributed(layers.Dense(units=num_words)),
layers.Activation('softmax')
]
)
# Define the regressor layers
self.regressor = tf.keras.Sequential(
[
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(units=32),
layers.Dense(units=16),
layers.Dense(units=1),
]
)
def encode(self, x):
# Compute the mean and log variance of the latent variable
h = self.encoder(x)
mean, log_var = tf.split(h, num_or_size_splits=2, axis=1)
return mean, log_var
def reparameterize(self, mean, log_var):
# Sample from the latent variable distribution
eps = tf.random.normal(tf.shape(mean))
std_dev = tf.exp(0.5 * log_var)
z = mean + std_dev * eps
return z
def decode(self, z):
# Reconstruct the input from the latent variable
return self.decoder(z)
def predict_properties(self, z):
# Predict the properties of the input
return self.regressor(z)
def call(self, x):
# Define the forward pass of the model
mean, log_var = self.encode(x)
z = self.reparameterize(mean, log_var)
x_pred = self.decode(z)
properties = self.predict_properties(z)
return x_pred, mean, log_var, properties
def vae_loss(self, x, x_pred, mean, log_var):
recon_loss = tf.reduce_sum(tf.keras.losses.binary_crossentropy(x, x_pred), axis = 1)
kl_loss = -0.5 * tf.reduce_sum(1 + log_var - tf.square(mean) - tf.exp(log_var), axis = 1)
return tf.reduce_mean(recon_loss + kl_loss)
def property_loss(self, y_true, y_pred):
# Compute the mean squared error between the true and predicted properties
return tf.reduce_mean(tf.keras.losses.mean_squared_error(y_true, y_pred))
def train_step(self, x, y_true):
with tf.GradientTape() as tape:
x_pred, mean, log_var, y_pred = self.call(x)
vae_loss_value = self.vae_loss(x, x_pred, mean, log_var)
property_loss_value = self.property_loss(y_true, y_pred)
total_loss = vae_loss_value + property_loss_value
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
gradients = tape.gradient(total_loss, self.trainable_variables)
optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return vae_loss_value, property_loss_value
latent_dim = 8
num_epochs = 50
batch_size = 256
vae = VAEWithRegressor(latent_dim)
x_train = x
y_train = y
for epoch in range(num_epochs):
epoch_vae_loss = 0
epoch_property_loss = 0
for i in range(0, len(x_train), batch_size):
x_batch = x_train[i:i+batch_size]
y_batch = y_train[i:i+batch_size]
vae_loss_value, property_loss_value = vae.train_step(x_batch, y_batch)
epoch_vae_loss += vae_loss_value
epoch_property_loss += property_loss_value
epoch_vae_loss /= (len(x_train) / batch_size)
epoch_property_loss /= (len(x_train) / batch_size)
print('Epoch {}, VAE loss: {}, Property loss: {}'.format(epoch+1, epoch_vae_loss, epoch_property_loss))
z_sample = vae.encoder.predict(x)[:,:latent_dim]
x_pred = np.array(vae.decoder.predict(z_sample))
y_pred = np.array(vae.predict_properties(z_sample))

How select only some trainable variables from NN model to minimize with SciPy L_BFGS_B optimizer?

I'm implementing a physical informed neural network (PINN) model to solve the Navier-Stokes equation, as in PINN. This type of model works better when using L_BFGS_B, and the better optimizer for my case is the fmin_l_bfgs_b from SciPy.
The problem with this optimizer is that they do not work directly with the TensorFlow library. To work with TensorFlow, I implement a class L_BFGS_B with the following methods.
set_weights: Set weights to the model.:
evaluate: evaluate loss and gradients
tf_evaluate: Evaluate loss and gradients as tf.tensor
fit: Train the model
All works fine. The optimizer is training all weights of the model, but the problem is that I only want to train two out of 18 trainable variables.
**Optimizer class **
class L_BFGS_B:
def __init__(self, model, x_train, y_train, factr = 1, m=50, maxls=50,maxfun = 50000, maxiter=50000):
self.model = model
#x_train = xyt, y_train = uv
self.x_train = x_train #tf.constant(x_train, dtype=tf.float32)
self.y_train = y_train #tf.constant(y_train, dtype=tf.float32)
# quando iteração termina
self.factr = factr
#The maximum number of variable metric corrections used
self.m = m
#max number of line search steps/iteration
# nesse caso 50/iteração
self.maxls = maxls
#max number of interation
self.maxiter = maxiter
self.maxfun = maxfun
#tf.function
def tf_evaluate(self, x, y):
"""
Evaluate loss and gradients for weights as tf.Tensor.
Args:
x: input data.
Returns:
loss and gradients for weights as tf.Tensor.
"""
# wehre x = xyt , y = uv
with tf.GradientTape() as g:
uv_fuv = self.model([x, y])
loss = self.model.losses[0]
grads = g.gradient(loss, self.model.trainable_variables, unconnected_gradients=tf.UnconnectedGradients.ZERO)
return loss, grads
def set_weights(self, flat_weights):
"""
Set weights to the model.
Args:
flat_weights: flatten weights.
"""
weights_shapes = [ w.shape for w in self.model.get_weights() ]
n = [0] + [ np.prod(shape) for shape in weights_shapes ]
partition = np.cumsum(n)
weights = [ flat_weights[from_part:to_part].reshape(shape)
for from_part, to_part, shape
in zip(partition[:-1], partition[1:], weights_shapes) ]
self.model.set_weights(weights)
def evaluate(self, flat_weights):
"""
Evaluate loss and gradients for weights as ndarray.
Args:
weights: flatten weights.
Returns:
loss and gradients for weights as ndarray.
"""
self.set_weights(flat_weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
initial_weights = np.concatenate([ w.flatten() for w in self.model.get_weights() ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights,
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
if __name__ == "__main__":
...
# load Data
...
indices = np.random.choice(N*T, n_train, replace = False)
xyt_train = tf.concat( (x_1d[indices], y_1d[indices], t_1d[indices]), axis = 1)
uv_train = tf.concat( (u_1d[indices], v_1d[indices]), axis = 1)
# Model
nn_model = NeuralNet().build()
pinn_model = PhysicsInformedNN(model = nn_model).build()
#Optimizer
lbfgs = L_BFGS_B(model = pinn_model, x_train = xyt_train, y_train = uv_train)
lbfgs.fit()
Attempt
Use arg in the fmin_l_bfgs_b, where args is passed as the trainable variables that I want to fix and **x0 ** the initial two variables to be minimized. The following code is only a sanity test to see if passing the weights in this way works.
def evaluate(self, weights_var, *args):
weights = np.append(weights_var, args)
self.set_weights(weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
weights_fixed = np.concatenate([ w.flatten() for w in self.model.get_weights()[2:] ])
weights_var = np.concatenate([ w.flatten() for w in self.model.get_weights()[0:2] ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights, args = (weights_fixed)
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
Unfortunately, the following error is raised: 0-th dimension must be fixed to 2 but got 2644.
Question: There is a way to fix the trainable variables that I do not want to minimize, work with the ones that are not fixed, and in the final set back then to the neural network model using this type of optimizer?

tfr.keras.losses.ListMLELoss() is always 0 during training, validation, and testing

I have made a ranking model using tensorflow_ranking losses and metrics, but the ListMLELoss() is always 0. The model will train and complete, but I imagine no learning is actually happening since the loss is not getting calculated. I tried to follow this guide, https://www.tensorflow.org/recommenders/examples/listwise_ranking, as well as I could, but there are some differences in use cases so it is a bit different. I am not sure why model.fit() runs and I get an NDCG value, but clearly the model cannot be learning as a loss value is not getting computed.
Here is my ranking model class:
class RankingModel(tf.keras.Model):
def __init__(self, embeddings, vocab_size_dict, dim_dict, loss, activation='sigmoid'):
super().__init__()
self.embeddings = embeddings
self.embedding_layers = {}
self.vocab_size_dict = vocab_size_dict
self.dim_dict = dim_dict
self.activation = activation
self.loss = loss
self.embedding_layers['feature_one'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_one']+1,
self.dim_dict['feature_one'],
name='embedded_feature_one')
self.embedding_layers['feature_two'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_two']+1,
self.dim_dict['feature_two'],
name='embedded_feature_two')
self.embedding_layers['feature_three'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_three']+1,
self.dim_dict['feature_three'],
name='embedded_feature_three')
self.embedding_layers['feature_four'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_four']+1,
self.dim_dict['feature_four'],
name='embedded_feature_four')
self.embedding_layers['feature_five'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_five']+1,
self.dim_dict['feature_five'],
name='embedded_feature_five')
self.flatten = tf.keras.layers.Flatten()
self.concatenate = tf.keras.layers.Concatenate(axis=1, name='Input_Concatenation')
self.batchnorm = tf.keras.layers.BatchNormalization(name='batchnorm')
self.score_model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='leaky_relu'),
tf.keras.layers.Dense(12, activation=activation)
])
self.task = tfrs.tasks.Ranking(
loss=self.loss,
metrics=[
tfr.keras.metrics.NDCGMetric(name="ndcg_metric")
]
)
def __call__(self, features, training=False):
feats = []
for feat, tens in features[0].items():
if feat in self.embeddings:
embedding = self.embedding_layers[feat](tens)
flatten = self.flatten(embedding)
feats.append(flatten)
if feat == 'continuous':
flatten = self.flatten(tens)
feats.append(flatten)
deep_concatenated = self.concatenate(feats)
batchnorm = self.batchnorm(deep_concatenated)
scores = self.score_model(batchnorm)
print("scores: ", scores)
print("mask: ", features[0]['mask'])
masked_scores = tf.boolean_mask(scores, features[0]['mask'])
# pred = tf.expand_dims(masked_scores, axis=1)
# return pred
return tf.expand_dims(masked_scores, axis=1)
def compute_loss(self, features, training=False):
labels = features[1]
# print("labels: ", labels)
# print("mask: ", features[0]['mask'])
masked_labels = tf.boolean_mask(labels, features[0]['mask'])
# print("masked labels:", masked_labels)
masked_labels = tf.expand_dims(masked_labels, axis=1)
print("masked_labels: ", masked_labels)
scores = self(features)
print("scores: ", scores)
print("loss: ", self.task(labels=masked_labels, predictions=scores))
return self.task(
labels=masked_labels,
predictions=scores
)
def train_step(self, inputs):
"""Custom train step using the `compute_loss` method."""
with tf.GradientTape() as tape:
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
def test_step(self, inputs):
"""Custom test step using the `compute_loss` method."""
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
Can anybody see why I am not getting a loss value? Thanks a lot. Please let me know if you need additional info. Maybe I can create some synthetic data so you can run it all yourself. Been pulling my hair out for a few days trying to get this to work so any advice is MUCH appreicated.

In Tensorflow, how do you include a pre-trained model in a custom model?

I've got a custom model that I've pre-trained in a separate notebook called vae which I've saved using vae.save().
I'm now looking to implement a model which is a full training pipeline containing the pre-trained vae and some other (including a ResNet-50 from the Tensorflow gallery).
In the Tensorflow documentation about Making new Layers and Models via subclassing it doesn't mention anything about including models in subclassed models.
My question is how do I include my pre-trained VAE in a new subclassed model?
I've already tried writing the code below, which works fine for training, but I haven't seen implementations like it elsewhere and I'm getting errors when trying to do reid.save() or reid.summary() on the model, so I presume there is a better way to do it:
class ReId(keras.Model):
def __init__(self, vae, num_ids, **kwargs):
super(ReId, self).__init__(**kwargs)
self.vae = vae
self.convex_combination = ConvexCombination()
self.resnet_50 = resnet50
self.glob_avg_pool = keras.layers.GlobalAveragePooling2D()
self.bnneck = keras.layers.BatchNormalization()
self.num_ids = num_ids
self.final_fc_layer = keras.layers.Dense(self.num_ids)
self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
self.vae_loss_tracker = keras.metrics.Mean(
name="vae_loss"
)
self.classification_loss_tracker = keras.metrics.Mean(name="classification_loss")
self.triplet_loss_tracker = keras.metrics.Mean(name="triplet_loss")
#property
def metrics(self):
return [
self.total_loss_tracker,
self.vae_loss_tracker,
# Classification loss is cross-entropy loss here
self.classification_loss_tracker,
self.triplet_loss_tracker,
]
def train_step(self, data):
x, y = data
y = tf.cast(y, tf.int32)
with tf.GradientTape() as tape:
# Calculate VAE loss
z_mean, z_log_var, z = self.vae.encoder(x)
reconstruction = self.vae.decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.mean_squared_error(x, reconstruction)
)
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
# Calculate VAE loss
vae_loss = reconstruction_loss + (LAMBDA * kl_loss)
embedding_output = self.call(x)
bnneck_output = self.bnneck(embedding_output)
training_output = self.final_fc_layer(bnneck_output)
# Calculate triplet loss
triplet_loss = tfa.losses.triplet_semihard_loss(y_true=y, y_pred=embedding_output)
# Calculate cross-entropy loss
ce_loss = keras.losses.sparse_categorical_crossentropy(y, training_output)
# Sum up the losses
total_loss = triplet_loss + ce_loss + (TOTAL_LOSS_VAE_MULTIPLIER * vae_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.classification_loss_tracker.update_state(ce_loss)
self.triplet_loss_tracker.update_state(triplet_loss)
self.vae_loss_tracker.update_state(vae_loss)
return {
"loss": self.total_loss_tracker.result(),
"classification loss:": self.classification_loss_tracker.result(),
"triplet loss:": self.triplet_loss_tracker.result(),
"vae loss:": self.vae_loss_tracker.result(),
}
def get_config(self):
config = super(ReId, self).get_config()
config.update({"num_ids": self.num_ids})
return config
def call(self, data):
recon_img = self.vae(data)
convex_combo_input = [data, (data - recon_img)]
convex_combo_output = self.convex_combination(convex_combo_input)
final_output = self.glob_avg_pool(self.resnet_50(convex_combo_output))
return final_output

How to generate a sample sentence with LSTM model in Tensorflow?

I'm working with the LSTM model in Tensorflow.
I already trained and saved the LSTM model. Now I'm coming up to the last task to generate the sentences.
Here is my pseudo code:
# We have already the run_epoch(session, m, data, eval_op, verbose=False) function with fee_dict like this:
feed_dict = {m.input_data: x,
m.targets: y,
m.initial_state: state}
...
# train and save model
...
# load saved model for generating task
new_sentence = [START_TOKEN]
# Here I want to generate a sentence until END_TOKEN is generated.
while new_sentence[-1] != END_TOKEN:
logits = get_logits(model, new_sentence)
# get argmax(logits) or sample(logits)
next_word = argmax(logits)
new_sentence.append(next_word)
print(new_sentence)
My question is:
When training, validating, or testing model I have to feed both of the inputs and their labels (by shifted inputs one) into model via feed_dict dictionary. But in the generating task, I have only one input which is the generating sentence new_sentence.
How can I build the right get_logits function or full generate function also?
when you train you have an output of the neural network, based on that output you calculate the error, based on error you create the optimizer to minimize the error.
In order to generate a new sentence you need to get just the output of the neural network(rnn).
Edited:
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
RNN Inputs
"""
# Turn our x placeholder into a list of one-hot tensors:
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes]
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unpack(x_one_hot, axis=1)
"""
Definition of rnn_cell
This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py
"""
with tf.variable_scope('rnn_cell'):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
def rnn_cell(rnn_input, state):
with tf.variable_scope('rnn_cell', reuse=True):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
return tf.tanh(tf.matmul(tf.concat(1, [rnn_input, state]), W) + b)
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
state = rnn_cell(rnn_input, state)
rnn_outputs.append(state)
final_state = rnn_outputs[-1]
#logits and predictions
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
# Turn our y placeholder into a list labels
y_as_list = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, num_steps, y)]
#losses and train_step
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logit,label) for \
logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
def train():
with tf.Session() as sess:
#load the model
training_losses = []
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
#save the model
def generate_seq():
with tf.Session() as sess:
#load the model
# load saved model for generating task
new_sentence = [START_TOKEN]
# Here I want to generate a sentence until END_TOKEN is generated.
while new_sentence[-1] != END_TOKEN:
logits = sess.run(final_state,{x:np.asarray([new_sentence])})
# get argmax(logits) or sample(logits)
next_word = argmax(logits[0])
new_sentence.append(next_word)
print(new_sentence)