How select only some trainable variables from NN model to minimize with SciPy L_BFGS_B optimizer? - optimization

I'm implementing a physical informed neural network (PINN) model to solve the Navier-Stokes equation, as in PINN. This type of model works better when using L_BFGS_B, and the better optimizer for my case is the fmin_l_bfgs_b from SciPy.
The problem with this optimizer is that they do not work directly with the TensorFlow library. To work with TensorFlow, I implement a class L_BFGS_B with the following methods.
set_weights: Set weights to the model.:
evaluate: evaluate loss and gradients
tf_evaluate: Evaluate loss and gradients as tf.tensor
fit: Train the model
All works fine. The optimizer is training all weights of the model, but the problem is that I only want to train two out of 18 trainable variables.
**Optimizer class **
class L_BFGS_B:
def __init__(self, model, x_train, y_train, factr = 1, m=50, maxls=50,maxfun = 50000, maxiter=50000):
self.model = model
#x_train = xyt, y_train = uv
self.x_train = x_train #tf.constant(x_train, dtype=tf.float32)
self.y_train = y_train #tf.constant(y_train, dtype=tf.float32)
# quando iteração termina
self.factr = factr
#The maximum number of variable metric corrections used
self.m = m
#max number of line search steps/iteration
# nesse caso 50/iteração
self.maxls = maxls
#max number of interation
self.maxiter = maxiter
self.maxfun = maxfun
#tf.function
def tf_evaluate(self, x, y):
"""
Evaluate loss and gradients for weights as tf.Tensor.
Args:
x: input data.
Returns:
loss and gradients for weights as tf.Tensor.
"""
# wehre x = xyt , y = uv
with tf.GradientTape() as g:
uv_fuv = self.model([x, y])
loss = self.model.losses[0]
grads = g.gradient(loss, self.model.trainable_variables, unconnected_gradients=tf.UnconnectedGradients.ZERO)
return loss, grads
def set_weights(self, flat_weights):
"""
Set weights to the model.
Args:
flat_weights: flatten weights.
"""
weights_shapes = [ w.shape for w in self.model.get_weights() ]
n = [0] + [ np.prod(shape) for shape in weights_shapes ]
partition = np.cumsum(n)
weights = [ flat_weights[from_part:to_part].reshape(shape)
for from_part, to_part, shape
in zip(partition[:-1], partition[1:], weights_shapes) ]
self.model.set_weights(weights)
def evaluate(self, flat_weights):
"""
Evaluate loss and gradients for weights as ndarray.
Args:
weights: flatten weights.
Returns:
loss and gradients for weights as ndarray.
"""
self.set_weights(flat_weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
initial_weights = np.concatenate([ w.flatten() for w in self.model.get_weights() ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights,
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
if __name__ == "__main__":
...
# load Data
...
indices = np.random.choice(N*T, n_train, replace = False)
xyt_train = tf.concat( (x_1d[indices], y_1d[indices], t_1d[indices]), axis = 1)
uv_train = tf.concat( (u_1d[indices], v_1d[indices]), axis = 1)
# Model
nn_model = NeuralNet().build()
pinn_model = PhysicsInformedNN(model = nn_model).build()
#Optimizer
lbfgs = L_BFGS_B(model = pinn_model, x_train = xyt_train, y_train = uv_train)
lbfgs.fit()
Attempt
Use arg in the fmin_l_bfgs_b, where args is passed as the trainable variables that I want to fix and **x0 ** the initial two variables to be minimized. The following code is only a sanity test to see if passing the weights in this way works.
def evaluate(self, weights_var, *args):
weights = np.append(weights_var, args)
self.set_weights(weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
weights_fixed = np.concatenate([ w.flatten() for w in self.model.get_weights()[2:] ])
weights_var = np.concatenate([ w.flatten() for w in self.model.get_weights()[0:2] ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights, args = (weights_fixed)
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
Unfortunately, the following error is raised: 0-th dimension must be fixed to 2 but got 2644.
Question: There is a way to fix the trainable variables that I do not want to minimize, work with the ones that are not fixed, and in the final set back then to the neural network model using this type of optimizer?

Related

tfr.keras.losses.ListMLELoss() is always 0 during training, validation, and testing

I have made a ranking model using tensorflow_ranking losses and metrics, but the ListMLELoss() is always 0. The model will train and complete, but I imagine no learning is actually happening since the loss is not getting calculated. I tried to follow this guide, https://www.tensorflow.org/recommenders/examples/listwise_ranking, as well as I could, but there are some differences in use cases so it is a bit different. I am not sure why model.fit() runs and I get an NDCG value, but clearly the model cannot be learning as a loss value is not getting computed.
Here is my ranking model class:
class RankingModel(tf.keras.Model):
def __init__(self, embeddings, vocab_size_dict, dim_dict, loss, activation='sigmoid'):
super().__init__()
self.embeddings = embeddings
self.embedding_layers = {}
self.vocab_size_dict = vocab_size_dict
self.dim_dict = dim_dict
self.activation = activation
self.loss = loss
self.embedding_layers['feature_one'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_one']+1,
self.dim_dict['feature_one'],
name='embedded_feature_one')
self.embedding_layers['feature_two'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_two']+1,
self.dim_dict['feature_two'],
name='embedded_feature_two')
self.embedding_layers['feature_three'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_three']+1,
self.dim_dict['feature_three'],
name='embedded_feature_three')
self.embedding_layers['feature_four'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_four']+1,
self.dim_dict['feature_four'],
name='embedded_feature_four')
self.embedding_layers['feature_five'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_five']+1,
self.dim_dict['feature_five'],
name='embedded_feature_five')
self.flatten = tf.keras.layers.Flatten()
self.concatenate = tf.keras.layers.Concatenate(axis=1, name='Input_Concatenation')
self.batchnorm = tf.keras.layers.BatchNormalization(name='batchnorm')
self.score_model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='leaky_relu'),
tf.keras.layers.Dense(12, activation=activation)
])
self.task = tfrs.tasks.Ranking(
loss=self.loss,
metrics=[
tfr.keras.metrics.NDCGMetric(name="ndcg_metric")
]
)
def __call__(self, features, training=False):
feats = []
for feat, tens in features[0].items():
if feat in self.embeddings:
embedding = self.embedding_layers[feat](tens)
flatten = self.flatten(embedding)
feats.append(flatten)
if feat == 'continuous':
flatten = self.flatten(tens)
feats.append(flatten)
deep_concatenated = self.concatenate(feats)
batchnorm = self.batchnorm(deep_concatenated)
scores = self.score_model(batchnorm)
print("scores: ", scores)
print("mask: ", features[0]['mask'])
masked_scores = tf.boolean_mask(scores, features[0]['mask'])
# pred = tf.expand_dims(masked_scores, axis=1)
# return pred
return tf.expand_dims(masked_scores, axis=1)
def compute_loss(self, features, training=False):
labels = features[1]
# print("labels: ", labels)
# print("mask: ", features[0]['mask'])
masked_labels = tf.boolean_mask(labels, features[0]['mask'])
# print("masked labels:", masked_labels)
masked_labels = tf.expand_dims(masked_labels, axis=1)
print("masked_labels: ", masked_labels)
scores = self(features)
print("scores: ", scores)
print("loss: ", self.task(labels=masked_labels, predictions=scores))
return self.task(
labels=masked_labels,
predictions=scores
)
def train_step(self, inputs):
"""Custom train step using the `compute_loss` method."""
with tf.GradientTape() as tape:
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
def test_step(self, inputs):
"""Custom test step using the `compute_loss` method."""
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
Can anybody see why I am not getting a loss value? Thanks a lot. Please let me know if you need additional info. Maybe I can create some synthetic data so you can run it all yourself. Been pulling my hair out for a few days trying to get this to work so any advice is MUCH appreicated.

" ValueError: Expecting KerasTensor which is from tf.keras.Input()". Error in prediction with dropout function

I am trying to predict uncertainty in a regression problem using Dropout during testing as per Yarin Gal's article. I created a class using Keras's backend function as provided by this stack overflow question's answer. The class takes a NN model as input and randomly drops neurons during testing to give a stochastic estimate rather than deterministic output for a time-series forecasting.
I create a simple encoder-decoder model as shown below for the forecasting with 0.1 dropout during training:
input_sequence = Input(shape=(lookback, train_x.shape[2]))
encoder = LSTM(128, return_sequences=False)(input_sequence)
r_vec = RepeatVector(forward_pred)(encoder)
decoder = LSTM(128, return_sequences=True, dropout=0.1)(r_vec) #maybe use dropout=0.1
output = TimeDistributed(Dense(train_y.shape[2], activation='linear'))(decoder)
# optimiser = optimizers.Adam(clipnorm=1)
enc_dec_model = Model(input_sequence, output)
enc_dec_model.compile(loss="mean_squared_error",
optimizer="adam",
metrics=['mean_squared_error'])
enc_dec_model.summary()
After that, I define and call the DropoutPrediction class.
# Define the class:
class KerasDropoutPrediction(object):
def __init__(self ,model):
self.f = K.function(
[model.layers[0].input,
K.learning_phase()],
[model.layers[-1].output])
def predict(self ,x, n_iter=10):
result = []
for _ in range(n_iter):
result.append(self.f([x , 1]))
result = np.array(result).reshape(n_iter ,x.shape[0] ,x.shape[1]).T
return result
# Call the object:
kdp = KerasDropoutPrediction(enc_dec_model)
y_pred_do = kdp.predict(x_test,n_iter=100)
y_pred_do_mean = y_pred_do.mean(axis=1)
However, in the line
kdp = KerasDropoutPrediction(enc_dec_model), when I call the LSTM model,
I got the following error message which says the input has to be a Keras Tensor. Can anyone help me with this error?
Error Message:
ValueError: Found unexpected instance while processing input tensors for keras functional model. Expecting KerasTensor which is from tf.keras.Input() or output from keras layer call(). Got: 0
To activate Dropout at inference time, you simply have to specify training=True (TF>2.0) in the layer of interest (in the last LSTM layer in your case)
with training=False
inp = Input(shape=(10, 1))
x = LSTM(1, dropout=0.3)(inp, training=False)
m = Model(inp,x)
# m.compile(...)
# m.fit(...)
X = np.random.uniform(0,1, (1,10,1))
output = []
for i in range(0,100):
output.append(m.predict(X)) # always the same
with training=True
inp = Input(shape=(10, 1))
x = LSTM(1, dropout=0.3)(inp, training=True)
m = Model(inp,x)
# m.compile(...)
# m.fit(...)
X = np.random.uniform(0,1, (1,10,1))
output = []
for i in range(0,100):
output.append(m.predict(X)) # always different
In your example, this becomes:
input_sequence = Input(shape=(lookback, train_x.shape[2]))
encoder = LSTM(128, return_sequences=False)(input_sequence)
r_vec = RepeatVector(forward_pred)(encoder)
decoder = LSTM(128, return_sequences=True, dropout=0.1)(r_vec, training=True)
output = TimeDistributed(Dense(train_y.shape[2], activation='linear'))(decoder)
enc_dec_model = Model(input_sequence, output)
enc_dec_model.compile(
loss="mean_squared_error",
optimizer="adam",
metrics=['mean_squared_error']
)
enc_dec_model.fit(train_x, train_y, epochs=10, batch_size=32)
and the KerasDropoutPrediction:
class KerasDropoutPrediction(object):
def __init__(self, model):
self.model = model
def predict(self, X, n_iter=10):
result = []
for _ in range(n_iter):
result.append(self.model.predict(X))
result = np.array(result)
return result
kdp = KerasDropoutPrediction(enc_dec_model)
y_pred_do = kdp.predict(test_x, n_iter=100)
y_pred_do_mean = y_pred_do.mean(axis=0)

How to multiply a layer by a constant vector element wise in Keras?

I want to make a weighted average ensemble of 3 of my trained models. So, I want first to multiply the softmax output of a model (element-wise) by a vector and then average the 3 weighted outputs of the 3 models.
I used the following code to multiply the output of the first model by its weight vector:
from keras.layers import Multiply, Average
resnet_weights = np.asarray([[0.91855, 0.99485, 0.89065, 0.96525, 0.98005,
0.93645, 0.6149, 0.934, 0.92505, 0.785, 0.85]], np.float32)
resnet_weight_tensor=tf.constant(resnet_weights, np.float32)
sess = tf.InteractiveSession()
print(resnet_weight_tensor.eval())
sess.close()
resnet_weighted = Multiply()([finetuned_model.layers[-1].output, resnet_weight_tensor])
print(resnet_weighted)
new_model=Model(model.input, resnet_weighted)
However, I'm stuck with the following error:
What can I do?
Use Lambda instead of Multiply, and K.constant instead of tf.constant (is backend-neutral):
resnet_weight_tensor=K.constant(resnet_weights, 'float32')
out = finetuned_model.layers[-1].output
resnet_weighted = Lambda(lambda x: x * resnet_weight_tensor)(out)
FULL EXAMPLE:
## BUILD MODELS
batch_size = 32
num_batches = 100
input_shape = (4,)
num_classes = 3
model_1 = make_model(input_shape, 8, num_classes)
model_2 = make_model(input_shape, 10, num_classes)
model_3 = make_model(input_shape, 12, num_classes)
## BUILD ENSEMBLE
models = (model_1, model_2, model_3)
models_ins = [model.input for model in models]
models_outs = [model.input for model in models]
outputs_weights = [np.random.random((batch_size, num_classes)),
np.random.random((batch_size, num_classes)),
np.random.random((batch_size, num_classes))]
outs_avg = model_outputs_average(models, outputs_weights)
final_out = Dense(num_classes, activation='softmax')(outs_avg)
model_ensemble = Model(inputs=models_ins, outputs=final_out)
model_ensemble.compile('adam', loss='categorical_crossentropy')
### TEST ENSEMBLE
x1 = np.random.randn(batch_size, *input_shape) # toy data
x2 = np.random.randn(batch_size, *input_shape)
x3 = np.random.randn(batch_size, *input_shape)
y = np.random.randint(0,2,(batch_size, num_classes)) # toy labels
model_ensemble.fit([x1,x2,x3], y)
Verify averaging:
[print(layer.name) for layer in model_ensemble.layers] # show layer names
preouts1 = get_layer_outputs(model_ensemble, 'lambda_1', [x1,x2,x3])
preouts2 = get_layer_outputs(model_ensemble, 'lambda_2', [x1,x2,x3])
preouts3 = get_layer_outputs(model_ensemble, 'lambda_3', [x1,x2,x3])
preouts_avg = get_layer_outputs(model_ensemble, 'average_1',[x1,x2,x3])
preouts = np.asarray([preouts1, preouts2, preouts3])
sum_of_diff_of_means = np.sum(np.mean(preouts, axis=0) - preouts_avg)
print(np.sum(np.mean([preouts1, preouts2, preouts3],axis=0) - preouts_avg))
# 4.69e-07
Functions used:
def make_model(input_shape, dense_dim, num_classes=3):
ipt = Input(shape=input_shape)
x = Dense(dense_dim, activation='relu')(ipt)
out = Dense(num_classes, activation='softmax')(x)
model = Model(ipt, out)
model.compile('adam', loss='categorical_crossentropy')
return model
def model_outputs_average(models, outputs_weights):
outs = [model.output for model in models]
out_shape = K.int_shape(outs[0])[1:] # ignore batch dim
assert all([(K.int_shape(out)[1:] == out_shape) for out in outs]), \
"All model output shapes must match"
outs_weights = [K.constant(w, 'float32') for w in outputs_weights]
ow_shape = K.int_shape(outs_weights[0])
assert all([(K.int_shape(w) == ow_shape) for w in outs_weights]), \
"All outputs_weights and model.output shapes must match"
weights_layers = [Lambda(lambda x: x * ow)(out) for ow, out
in zip(outs_weights, outs)]
return Average()(weights_layers)
def get_layer_outputs(model,layer_name,input_data,train_mode=False):
outputs = [layer.output for layer in model.layers if layer_name in layer.name]
layers_fn = K.function([model.input, K.learning_phase()], outputs)
return [layers_fn([input_data,int(train_mode)])][0][0]
The bug is possibly caused by the mixture of kears api and tensorflow api, since your resnet_weight_tensor is a tensor from tensorflow api, while finetuned_model.layers[-1].output is the output from a keras layer. Some discusses can be seen here issue 7362
One walk around is to wrap resnet_weight_tensor into keras Input layer.
from keras.layers import Multiply, Average, Input
resnet_weights = np.asarray([[0.91855, 0.99485, 0.89065, 0.96525, 0.98005,
0.93645, 0.6149, 0.934, 0.92505, 0.785, 0.85]], np.float32)
resnet_weight_tensor=tf.constant(resnet_weights, np.float32)
resnet_weight_input = Input(tensor=resnet_weight_tensor)
sess = tf.InteractiveSession()
print(resnet_weight_tensor.eval())
sess.close()
resnet_weighted = Multiply()([finetuned_model.layers[-1].output, resnet_weight_input])
print(resnet_weighted)
new_model=Model([model.input, resnet_weight_input], resnet_weighted)

How to apply a computed loss to a graph?

I am new to tensorflow and trying to code a toy discriminator problem. The way I have it set up, the loss is calculated from the expert_actions and the novice_actions. However, I am running an error when I am trying to optimize using the computed loss. The error is ValueError: No variables to optimize. I do understand that I am getting the error because there is no feed_dict. However, I do not know the solution to this.
class discriminator:
def __init__(self,n_actions, learning_rate):
self.n_actions = n_actions
self.learning_rate_dist = learning_rate
self.graph = tf.Graph()
with self.graph.as_default():
self.dis_input = tf.placeholder(tf.float32, [None, self.n_actions])
self.discriminator_function()
init = tf.global_variables_initializer()
self.sess = tf.Session(graph=self.graph)
self.sess.run(init)
def discriminator_function(self, hidden = None):
if hidden == None:
hidden = 16
x = tf.layers.dense(self.dis_input,hidden,tf.nn.relu)
x = tf.layers.dense(x,hidden,tf.nn.relu)
self.dis_output = tf.layers.dense(x,1)
def discriminator(self,expert_actions,novice_actions):
expert_out = self.sess.run(self.dis_output,feed_dict={self.dis_input : expert_actions})
novice_out = self.sess.run(self.dis_output,feed_dict={self.dis_input : novice_actions})
loss = tf.reduce_mean(tf.log(expert_out) + tf.log(1.-novice_out))
# update discriminator loss
optimize = tf.train.AdamOptimizer(self.learning_rate_dis).minimize(-loss)
self.sess.run(optimize) #error over here
return loss
if __name__ == '__main__':
d = discriminator(2,0.001)
expert_actions = np.random.randint(2, size=10)
novice_actions = np.random.randint(2, size=10)
d.discriminator(expert_actions,novice_actions)
You are trying to optimize loss = tf.reduce_mean(tf.log(expert_out) + tf.log(1.-novice_out)) with expert_out and novice_out being numpy arrays. There are no variables between the input and loss, to compute gradients.
Your discriminator function should be something like this:
def discriminator(self,expert_actions,novice_actions):
#Make sure you add the new ops and variables to the graph defined.
with self.graph.as_default():
loss = tf.reduce_mean(tf.log('Should be a tensor that is part of the graph and not a numpy array))
optimize = tf.train.AdamOptimizer(0.01).minimize(-loss)
self.sess.run(tf.global_variables_initializer())
#pass the inputs here
loss = self.sess.run([loss, optimize], feed_dict={self.dis_input : expert_actions})
return loss

How to generate a sample sentence with LSTM model in Tensorflow?

I'm working with the LSTM model in Tensorflow.
I already trained and saved the LSTM model. Now I'm coming up to the last task to generate the sentences.
Here is my pseudo code:
# We have already the run_epoch(session, m, data, eval_op, verbose=False) function with fee_dict like this:
feed_dict = {m.input_data: x,
m.targets: y,
m.initial_state: state}
...
# train and save model
...
# load saved model for generating task
new_sentence = [START_TOKEN]
# Here I want to generate a sentence until END_TOKEN is generated.
while new_sentence[-1] != END_TOKEN:
logits = get_logits(model, new_sentence)
# get argmax(logits) or sample(logits)
next_word = argmax(logits)
new_sentence.append(next_word)
print(new_sentence)
My question is:
When training, validating, or testing model I have to feed both of the inputs and their labels (by shifted inputs one) into model via feed_dict dictionary. But in the generating task, I have only one input which is the generating sentence new_sentence.
How can I build the right get_logits function or full generate function also?
when you train you have an output of the neural network, based on that output you calculate the error, based on error you create the optimizer to minimize the error.
In order to generate a new sentence you need to get just the output of the neural network(rnn).
Edited:
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
RNN Inputs
"""
# Turn our x placeholder into a list of one-hot tensors:
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes]
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unpack(x_one_hot, axis=1)
"""
Definition of rnn_cell
This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py
"""
with tf.variable_scope('rnn_cell'):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
def rnn_cell(rnn_input, state):
with tf.variable_scope('rnn_cell', reuse=True):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
return tf.tanh(tf.matmul(tf.concat(1, [rnn_input, state]), W) + b)
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
state = rnn_cell(rnn_input, state)
rnn_outputs.append(state)
final_state = rnn_outputs[-1]
#logits and predictions
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
# Turn our y placeholder into a list labels
y_as_list = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, num_steps, y)]
#losses and train_step
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logit,label) for \
logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
def train():
with tf.Session() as sess:
#load the model
training_losses = []
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
#save the model
def generate_seq():
with tf.Session() as sess:
#load the model
# load saved model for generating task
new_sentence = [START_TOKEN]
# Here I want to generate a sentence until END_TOKEN is generated.
while new_sentence[-1] != END_TOKEN:
logits = sess.run(final_state,{x:np.asarray([new_sentence])})
# get argmax(logits) or sample(logits)
next_word = argmax(logits[0])
new_sentence.append(next_word)
print(new_sentence)