How to use a pre-trained embedding matrix in tensorflow 2.0 RNN as initial weights in an embedding layer? - tensorflow

I'd like to use a pretrained GloVe embedding as the initial weights for an embedding layer in an RNN encoder/decoder. The code is in Tensorflow 2.0. Simply adding the embedding matrix as a weights = [embedding_matrix] parameter to the tf.keras.layers.Embedding layer won't do it because the encoder is an object and I'm not sure now to effectively pass the embedding_matrix to this object at training time.
My code closely follows the neural machine translation example in the Tensorflow 2.0 documentation. How would I add a pre-trained embedding matrix to the encoder in this example? The encoder is an object. When I get to training, the GloVe embedding matrix is unavailable to the Tensorflow graph. I get the error message:
RuntimeError: Cannot get value inside Tensorflow graph function.
The code uses the GradientTape method and teacher forcing in the training process.
I've tried modifying the encoder object to include the embedding_matrix at various points, including in the encoder's init, call and initialize_hidden_state. All of these fail. The other questions on stackoverflow and elsewhere are for Keras or older versions of Tensorflow, not Tensorflow 2.0.
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
super(Encoder, self).__init__()
self.batch_sz = batch_sz
self.enc_units = enc_units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, weights=[embedding_matrix])
self.gru = tf.keras.layers.GRU(self.enc_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
def call(self, x, hidden):
x = self.embedding(x)
output, state = self.gru(x, initial_state = hidden)
return output, state
def initialize_hidden_state(self):
return tf.zeros((self.batch_sz, self.enc_units))
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))
# ... Bahdanau Attention, Decoder layers, and train_step defined, see link to full tensorflow code above ...
# Relevant training code
EPOCHS = 10
training_record = pd.DataFrame(columns = ['epoch', 'training_loss', 'validation_loss', 'epoch_time'])
for epoch in range(EPOCHS):
template = 'Epoch {}/{}'
print(template.format(epoch +1,
EPOCHS))
start = time.time()
enc_hidden = encoder.initialize_hidden_state()
total_loss = 0
total_val_loss = 0
for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
batch_loss = train_step(inp, targ, enc_hidden)
total_loss += batch_loss
if batch % 100 == 0:
template = 'batch {} ============== train_loss: {}'
print(template.format(batch +1,
round(batch_loss.numpy(),4)))

I was trying to do the same thing and getting the exact same error. The problem was that weights in the Embedding layer is currently deprecated. Changing weights= to embeddings_initializer= worked for me.
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim,
embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix),
trainable=False)

firslty : load pretrained embedding matrix using
def pretrained_embeddings(file_path, EMBEDDING_DIM, VOCAB_SIZE, word2idx):
# 1.load in pre-trained word vectors #feature vector for each word
print("graph in function",tf.get_default_graph())
print('Loading word vectors...')
word2vec = {}
with open(os.path.join(file_path+'.%sd.txt' % EMBEDDING_DIM), errors='ignore', encoding='utf8') as f:
# is just a space-separated text file in the format:
# word vec[0] vec[1] vec[2] ...
for line in f:
values = line.split()
word = values[0]
vec = np.asarray(values[1:], dtype='float32')
word2vec[word] = vec
print('Found %s word vectors.' % len(word2vec))
# 2.prepare embedding matrix
print('Filling pre-trained embeddings...')
num_words = VOCAB_SIZE
# initialization by zeros
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
for word, i in word2idx.items():
if i < VOCAB_SIZE:
embedding_vector = word2vec.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all zeros.
embedding_matrix[i] = embedding_vector
return embedding_matrix
2-then update Encoder class as following:
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz,embedding_matrix):
super(Encoder, self).__init__()
self.batch_sz = batch_sz
self.enc_units = enc_units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, weights=[embedding_matrix])
self.gru = tf.keras.layers.GRU(self.enc_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
def call(self, x, hidden):
x = self.embedding(x)
output, state = self.gru(x, initial_state = hidden)
return output, state
def initialize_hidden_state(self):
return tf.zeros((self.batch_sz, self.enc_units))
3-calling function that loads pre-trained embedding to get embedding matrix
embedding_matrix = pretrained_embeddings(file_path, EMBEDDING_DIM,vocab_size, word2idx)
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE,embedding_matrix)
# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))
Note : this works on tensorflow 1.13.1 well

Related

How to handle target decoder inputs for self attention transformer model during predict()

My question is essentially a duplicate of this one, where I'm confused as to what to pass into the decoder during the predict() (i.e., call()) phase. I've modified tutorials found here and here in order to create this script. This is being used for the purposes of self-attention on a time series dataset for regression (not NLP).
There's too much boilerplate to provide the full model so I'll write in the pertinent script:
Transformer.py
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
# The following imports are my custom Layers/Functions
from Utilities.MachineLearning.Keras.Layers.Encoder import Encoder
from Utilities.MachineLearning.Keras.Layers.Decoder import Decoder
from Utilities.MachineLearning.Keras.Functions.etc import create_padding_mask, create_look_ahead_mask
def create_masks(input, target):
# Encoder padding mask
encoder_mask = create_padding_mask(input)
# Used in the 2nd attention block in the decoder.
# This padding mask is used to mask the encoder outputs.
decoder_mask = create_padding_mask(input)
# Used in the 1st attention block in the decoder.
# It is used to pad and mask future tokens in the input received by
# the decoder.
look_ahead_mask = create_look_ahead_mask(tf.shape(target)[1])
target_mask = create_padding_mask(target)
encoder_decoder_mask = tf.maximum(target_mask, look_ahead_mask)
return encoder_mask, encoder_decoder_mask, decoder_mask
class Transformer(Model):
def __init__(
self,
num_inputs,
num_outputs=1,
num_heads=1,
num_layers=1,
num_embedding_inputs=None,
num_ff_inputs=None,
dropout=0,
):
super().__init__()
self.encoder = Encoder(
num_inputs,
num_heads,
num_layers,
num_embedding_inputs,
num_ff_inputs,
dropout,
)
self.decoder = Decoder(
num_inputs,
num_heads,
num_layers,
num_embedding_inputs,
num_ff_inputs,
dropout,
)
self.output_layer = Dense(num_outputs, name="Output")
def call(
self,
inputs,
targets,
training=None,
):
encoder_mask, encoder_decoder_mask, decoder_mask = create_masks(inputs, targets)
encoder_output = self.encoder(inputs, encoder_mask, training)
decoder_output, attention_weights = self.decoder(
targets, encoder_output, encoder_decoder_mask, decoder_mask, training
)
output = self.output_layer(decoder_output)
return output, attention_weights
train_step_signature = [
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]
#tf.function(input_signature=train_step_signature)
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, y, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
SelfAttention.py
# Don't worry about what Custom is, it's basically a modified Keras Model
from Utilities.MachineLearning.Keras.Models.Custom import Custom
from Utilities.MachineLearning.Keras.Models.Transformer import Transformer
class SelfAttention(Custom):
def initialize(self):
self.transformer = Transformer(
self.batch_input_shape[-1],
num_heads=self.attention_units,
dropout=self.attention_dropout,
name="Transformer",
)
def call(self, inputs, training=False):
# TODO: What about `targets`?
return self.transformer(inputs, training=training)
There was no point in using a decoder as all the relevant information for time series data is used by the encoder block.

" ValueError: Expecting KerasTensor which is from tf.keras.Input()". Error in prediction with dropout function

I am trying to predict uncertainty in a regression problem using Dropout during testing as per Yarin Gal's article. I created a class using Keras's backend function as provided by this stack overflow question's answer. The class takes a NN model as input and randomly drops neurons during testing to give a stochastic estimate rather than deterministic output for a time-series forecasting.
I create a simple encoder-decoder model as shown below for the forecasting with 0.1 dropout during training:
input_sequence = Input(shape=(lookback, train_x.shape[2]))
encoder = LSTM(128, return_sequences=False)(input_sequence)
r_vec = RepeatVector(forward_pred)(encoder)
decoder = LSTM(128, return_sequences=True, dropout=0.1)(r_vec) #maybe use dropout=0.1
output = TimeDistributed(Dense(train_y.shape[2], activation='linear'))(decoder)
# optimiser = optimizers.Adam(clipnorm=1)
enc_dec_model = Model(input_sequence, output)
enc_dec_model.compile(loss="mean_squared_error",
optimizer="adam",
metrics=['mean_squared_error'])
enc_dec_model.summary()
After that, I define and call the DropoutPrediction class.
# Define the class:
class KerasDropoutPrediction(object):
def __init__(self ,model):
self.f = K.function(
[model.layers[0].input,
K.learning_phase()],
[model.layers[-1].output])
def predict(self ,x, n_iter=10):
result = []
for _ in range(n_iter):
result.append(self.f([x , 1]))
result = np.array(result).reshape(n_iter ,x.shape[0] ,x.shape[1]).T
return result
# Call the object:
kdp = KerasDropoutPrediction(enc_dec_model)
y_pred_do = kdp.predict(x_test,n_iter=100)
y_pred_do_mean = y_pred_do.mean(axis=1)
However, in the line
kdp = KerasDropoutPrediction(enc_dec_model), when I call the LSTM model,
I got the following error message which says the input has to be a Keras Tensor. Can anyone help me with this error?
Error Message:
ValueError: Found unexpected instance while processing input tensors for keras functional model. Expecting KerasTensor which is from tf.keras.Input() or output from keras layer call(). Got: 0
To activate Dropout at inference time, you simply have to specify training=True (TF>2.0) in the layer of interest (in the last LSTM layer in your case)
with training=False
inp = Input(shape=(10, 1))
x = LSTM(1, dropout=0.3)(inp, training=False)
m = Model(inp,x)
# m.compile(...)
# m.fit(...)
X = np.random.uniform(0,1, (1,10,1))
output = []
for i in range(0,100):
output.append(m.predict(X)) # always the same
with training=True
inp = Input(shape=(10, 1))
x = LSTM(1, dropout=0.3)(inp, training=True)
m = Model(inp,x)
# m.compile(...)
# m.fit(...)
X = np.random.uniform(0,1, (1,10,1))
output = []
for i in range(0,100):
output.append(m.predict(X)) # always different
In your example, this becomes:
input_sequence = Input(shape=(lookback, train_x.shape[2]))
encoder = LSTM(128, return_sequences=False)(input_sequence)
r_vec = RepeatVector(forward_pred)(encoder)
decoder = LSTM(128, return_sequences=True, dropout=0.1)(r_vec, training=True)
output = TimeDistributed(Dense(train_y.shape[2], activation='linear'))(decoder)
enc_dec_model = Model(input_sequence, output)
enc_dec_model.compile(
loss="mean_squared_error",
optimizer="adam",
metrics=['mean_squared_error']
)
enc_dec_model.fit(train_x, train_y, epochs=10, batch_size=32)
and the KerasDropoutPrediction:
class KerasDropoutPrediction(object):
def __init__(self, model):
self.model = model
def predict(self, X, n_iter=10):
result = []
for _ in range(n_iter):
result.append(self.model.predict(X))
result = np.array(result)
return result
kdp = KerasDropoutPrediction(enc_dec_model)
y_pred_do = kdp.predict(test_x, n_iter=100)
y_pred_do_mean = y_pred_do.mean(axis=0)

How to make a Keras Dense Layer deal with 3D tensor as input for this Softmax Fully Connected Layer?

I am working on a custom problem, and i have to change the fully connected layer (Dense with softmax), My model code is something like this (with Keras Framework):
.......
batch_size = 8
inputs = tf.random.uniform(shape=[batch_size,1024,256],dtype=tf.dtypes.float32)
preds = Dense(num_classes,activation='softmax')(x) #final layer with softmax activation
....
model = Model(inputs=base_model.input,outputs=preds)
So, i have to change the Code of Dense Layer to output a Tensor of probabilities with the shape of [batch_size, 1024, num_classes], without using a for loop, i need it to be optimized and not a consuming time function
The Dense code version that i want to change:
class Dense(Layer):
"""Just your regular densely-connected NN layer.
`Dense` implements the operation:
`output = activation(dot(input, kernel) + bias)`
where `activation` is the element-wise activation function
passed as the `activation` argument, `kernel` is a weights matrix
created by the layer, and `bias` is a bias vector created by the layer
(only applicable if `use_bias` is `True`).
Note: if the input to the layer has a rank greater than 2, then
it is flattened prior to the initial dot product with `kernel`.
# Example
```python
# as first layer in a sequential model:
model = Sequential()
model.add(Dense(32, input_shape=(16,)))
# now the model will take as input arrays of shape (*, 16)
# and output arrays of shape (*, 32)
# after the first layer, you don't need to specify
# the size of the input anymore:
model.add(Dense(32))
```
# Arguments
units: Positive integer, dimensionality of the output space.
activation: Activation function to use
(see [activations](../activations.md)).
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
nD tensor with shape: `(batch_size, ..., input_dim)`.
The most common situation would be
a 2D input with shape `(batch_size, input_dim)`.
# Output shape
nD tensor with shape: `(batch_size, ..., units)`.
For instance, for a 2D input with shape `(batch_size, input_dim)`,
the output would have shape `(batch_size, units)`.
"""
def __init__(self, units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(Dense, self).__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = InputSpec(min_ndim=2)
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs):
output = K.dot(inputs, self.kernel)
if self.use_bias:
output = K.bias_add(output, self.bias)
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def get_config(self):
config = {
'units': self.units,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(Dense, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
There are three different ways in which this can be done (that I can think of). If you want to have a single dense layer, that maps a vector of 256 elements to a vector of num_classes elements, and apply it all across your batch of data (that is, use the same 256 x num_classes matrix of weights for every sample), then you don't need to do anything special, just use a regular Dense layer:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2570
Another way would be to have a single huge Dense layer that takes all 1024 * 256 values in at the same time and produces all 1024 * num_classes values at the output, that is, a layer with a matrix of weights with shape (1024 * 256) x (1024 * num_classes) (in the order if gigabytes of memory!). This is easy to do too, although it seems unlikely to be what you need:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Flatten, Dense, Reshape, Softmax
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
res = Flatten()(inp)
# This takes _a lot_ of memory!
layer = Dense(1024 * num_classes, activation=None)
out_res = layer(res)
# Apply softmax after reshaping
out_preact = Reshape((-1, num_classes))(out_res)
out = Softmax()(out_preact)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2684364800
Finally, you may want to have a set of 1024 weight matrices, each one applied to the corresponding sample in the input, which would imply an array of weights with shape (1024, 256, num_classes). I don't think this can be done with one of the standard Keras layers (or don't know how to)1, but it's easy enough to write a custom layer based on Dense to do that:
import tensorflow as tf
from tensorflow.keras.layers import Dense, InputSpec
class Dense2D(Dense):
def __init__(self, *args, **kwargs):
super(Dense2D, self).__init__(*args, **kwargs)
def build(self, input_shape):
assert len(input_shape) >= 3
input_dim1 = input_shape[-2]
input_dim2 = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim1, input_dim2, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(input_dim1, self.units),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=3, axes={-2: input_dim1, -1: input_dim2})
self.built = True
def call(self, inputs):
# Multiply each set of weights with each input element
output = tf.einsum('...ij,ijk->...ik', inputs, self.kernel)
if self.use_bias:
output += self.bias
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 3
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
You would then use it like this:
import tensorflow as tf
from tensorflow.keras import Input
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense2D(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680
1: As today points out in the comments, you can actually use a LocallyConnected1D layer to do the same that I tried to do with my Dense2D layer. It is as simple as this:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import LocallyConnected1D
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = LocallyConnected1D(num_classes, 1, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680

Are there any tools/libraries that can convert tensorflow lstm model to .mlmodel format to run in iOS app

I have a simple tensorflow model that consists of lstm layers - such as tf.contrib.rnn.LSTMBlockCell or tf.keras.layers.LSTM (I can provide the sample code also, if needed). I want to run the model on an iOS app. However, I have looked at several websites that say that presently there is no way to convert and run tensorflow model that consist LSTM layers on iOS apps.
I have tried these tools/libraries to convert the tensorflow model to .mlmodel format (or .tflite format)
1. Swift for Tensorflow
2. Tensorflow Lite for iOS
3. tfcoreml
However, these tools also does not seem to be able to convert the lstm layers model to .mlmodel format. These tools, however allow to use custom layers to be added. But I don't know how I can add LSTM custom layer.
Am I wrong in saying that there is no support to run tensorflow lstm model in iOS apps? If yes, then please guide me on how I can go ahead to include the model in iOS app. Is there any other tool/library that can be ued to convert it to .mlmodel format. If no, then are there any plans to include tensorflow support for iOS in future?
Model
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.contrib.rnn import *
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
#Summary parameters
logs_path = "logs/"
# Training Parameters
learning_rate = 0.001
training_steps = 1000
batch_size = 128
display_step = 200
# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_hidden = 128 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([num_classes]))
}
def RNN(x, weights, biases):
# Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
x = tf.unstack(x, timesteps, 1)
# Define a lstm cell with tensorflow
lstm_cell = rnn.LSTMBlockCell(num_hidden, forget_bias = 1.0)
#lstm_cell = tf.keras.layers.LSTMCell(num_hidden, unit_forget_bias=True)
# Get lstm cell output
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
logits = RNN(X, weights, biases)
with tf.name_scope('Model'):
prediction = tf.nn.softmax(logits, name = "prediction_layer")
with tf.name_scope('Loss'):
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name = "loss_layer"), name = "reduce_mean_loss")
with tf.name_scope('SGD'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate, name = "Gradient_Descent")
train_op = optimizer.minimize(loss_op, name = "minimize_layer")
with tf.name_scope('Accuracy'):
# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1), name = "correct_pred_layer")
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name = "reduce_mean_acc_layer")
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
#Create a summary to monitor cost tensor
tf.summary.scalar("loss", loss_op)
#Create a summary to monitor accuracy tensor
tf.summary.scalar("accuracy", accuracy)
#Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()
saver = tf.train.Saver()
save_path = ""
model_save = "model.ckpt"
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# op to write logs to Tensorboard
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
for step in range(1, training_steps+1):
total_batch = int(mnist.train.num_examples/batch_size)
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Reshape data to get 28 seq of 28 elements
batch_x = batch_x.reshape((batch_size, timesteps, num_input))
# Run optimization op (backprop)
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
if step % display_step == 0 or step == 1:
# Calculate batch loss and accuracy
loss, acc, summary = sess.run([loss_op, accuracy, merged_summary_op], feed_dict={X: batch_x,
Y: batch_y})
# Write logs at every iteration
summary_writer.add_summary(summary, step * total_batch)
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(loss) + ", Training Accuracy= " + \
"{:.3f}".format(acc))
saver.save(sess, model_save)
tf.train.write_graph(sess.graph_def, save_path, 'save_graph.pbtxt')
#print(sess.graph.get_operations())
print("Optimization Finished!")
print("Run the command line:\n" \
"--> tensorboard --logdir=logs/ " \
"\nThen open http://0.0.0.0:6006/ into your web browser")
# Calculate accuracy for 128 mnist test images
test_len = 128
test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
test_label = mnist.test.labels[:test_len]
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))
Code to generate the frozen model
import tensorflow as tf
import numpy as np
from tensorflow.python.tools import freeze_graph
save_path = ""
model_name = "test_model_tf_keras_layers_lstm"
input_graph_path = save_path + "save_graph.pbtxt"
checkpoint_path = save_path + "model.ckpt"
input_saver_def_path = ""
input_binary = False
output_node_names = "Model/prediction_layer" #output node's name. Should match to that mentioned in the code
restore_op_name = 'save/restore_all'
filename_tensor_name = 'save/const:0'
output_frozen_graph_name = save_path + 'frozen_model' + '.pb' # name of .pb file that one would like to give
clear_devices = True
freeze_graph.freeze_graph(input_graph_path, input_saver_def_path, input_binary, checkpoint_path, output_node_names, restore_op_name, filename_tensor_name, output_frozen_graph_name, clear_devices, "")
print("Model Freezed")
Conversion Code to generate .mlmodel format file
import tfcoreml
coreml_model = tfcoreml.convert(tf_model_path = 'frozen_model_test_model_tf_keras_layers_lstm.pb',
mlmodel_path = 'test_model.mlmodel',
output_feature_names = ['Model/prediction_layer:0'],
add_custom_layers = True)
coreml_model.save("test_model.mlmodel")
Error message shown with
lstm_cell = rnn.BasicLSTMCell(num_hidden, name = "lstm_cell")
Value Error: Split op case not handled. Input shape = [1, 512], output shape = [1, 128]
Error message shown with
lstm_cell = rnn.LSTMBlockCell(num_hidden, name = "lstm_cell")
InvalidArgumentError (see above for traceback): No OpKernel was registered to support Op 'LSTMBlockCell' used by node rnn/lstm_cell/LSTMBlockCell (defined at /anaconda2/lib/python2.7/site-packages/tfcoreml/_tf_coreml_converter.py:153) with these attrs: [forget_bias=1, use_peephole=false, cell_clip=-1, T=DT_FLOAT]
Registered devices: [CPU]
Registered kernels:
<no registered kernels>
[[node rnn/lstm_cell/LSTMBlockCell (defined at /anaconda2/lib/python2.7/site-packages/tfcoreml/_tf_coreml_converter.py:153) ]]
I expect that the frozen tensorflow model can be converted to .mlmodel format.

Finetuning DNN with continuous outputs in the last layer

Greatly appreciate it if someone could help me out here:
I'm trying to do some finetuning on a regression task --- my inputs are 200X200 RGB images and my prediction output/label is a set of real values (let's say, within [0,10], though scaling is not a big deal here...?) --- on top of InceptionV3 architecture. Here are my functions that take a pretrained Inception model, remove the last layer and add a a new layer, set up for finetuning...
"""
Fine-tuning functions
"""
IM_WIDTH, IM_HEIGHT = 299, 299 #fixed size for InceptionV3
NB_EPOCHS = 3
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172
def eucl_dist(inputs):
x, y = inputs
return ((x - y)**2).sum(axis=-1)
def add_new_last_continuous_layer(base_model):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top, for instance:
base_model = InceptionV3(weights='imagenet',include_top=False)
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(FC_SIZE, activation='relu')(x)
predictions = Lambda(eucl_dist, output_shape=(1,))(x)
model = Model(input=base_model.input, output=predictions)
return model
def setup_to_finetune_continuous(model):
"""Freeze the bottom NB_IV3_LAYERS and retrain the remaining top
layers.
note: NB_IV3_LAYERS corresponds to the top 2 inception blocks in
the inceptionv3 architecture
Args:
model: keras model
"""
for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
layer.trainable = False
for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
layer.trainable = True
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
loss='eucl_dist')
Here are my implementations:
base_model = InceptionV3(weights = "imagenet",
include_top=False, input_shape=(3,200,200))
model0 = add_new_last_continuous_layer(base_model)
setup_to_finetune_continuous(model0)
history=model0.fit(train_x, train_y, validation_data = (test_x, test_y), nb_epoch=epochs, batch_size=32)
scores = model0.evaluate(test_x, test_y, verbose = 0)
features = model0.predict(X_train)
where train_x is a (168435, 3, 200, 200) numpy array and train_y is a (168435,) numpy array. The same goes for test_x and test_y except the number of observations is 42509.
I got the TypeError: Tensor object is not iterable bug which occurred at predictions = Lambda(eucl_dist, output_shape=(1,))(x)'' when going through theadd_new_last_continuous_layer()`` function. Could you anyone kindly give me some guidance to get around that and what the problem is? Greatly appreciated and happy holidays!
EDIT:
Changed the functions to:
def eucl_dist(inputs):
x, y = inputs
return ((x - y)**2).sum(axis=-1)
def add_new_last_continuous_layer(base_model):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top, for instance:
base_model = InceptionV3(weights='imagenet',include_top=False)
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x1 = Dense(FC_SIZE, activation='relu')(x)
x2 = Dense(FC_SIZE, activation='relu')(x)
predictions = Lambda(eucl_dist, output_shape=eucl_dist_shape)([x1,x2])
model = Model(input=base_model.input, output=predictions)
return model
Your output shape for the lambda layer is wrong. Define your functions like this:
from keras import backend as K
def euclidean_distance(vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
def eucl_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)
predictions = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([input1, input2])