Create Multi-Output Model with KerasNLP (CLS + MLM) - tensorflow

I am trying to use Keras_NLP to pretrain a model through both Masked Language Modelling and using a proxy for the CLS token to classify certain labels. With just Masked Language Modelling, it works fine but when I try to make it multi-output and also classify labels using the CLS token it seems to be unable to work
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
!pip install -q keras-nlp
import keras_nlp
import numpy as np
#Creating Random Data
x = np.random.randint(3, 100, size = (1000, 140))
#Adding Int 2 at front of each vector to act as CLS
x_cls = np.hstack([np.full((1000,1),2), x])
y = np.random.randint(0, 3, size = (1000,1)) #Random Class for each x value
#Need data for actual problem in dataset format, so using it here
dataset =, y)).batch(32)
#Keras Masker
masker = keras_nlp.layers.MLMMaskGenerator(99, 0.15, mask_token_id = 0, unselectable_token_ids= [0,1,2], mask_selection_length = 15)
def process(input, y):
outputs = masker(input)
features = {
"tokens": outputs["tokens"],
"mask_positions": outputs["mask_positions"],
labels = outputs["mask_ids"]
weights = outputs["mask_weights"]
return features, labels, weights , y
#Apply Mask to Dataset
dataset_mask = x, y: process(x,y))
#Create Simple Encoder, for testing purposes
input = keras.Input(shape = (141,))
embedding = layers.Embedding(99, 20)(input)
x = layers.Dense(20, activation = "relu")(embedding)
encoder = keras.Model(inputs = input, outputs = x)
#MLM prediction + CLS prediction
inputs = {
"tokens": keras.Input(shape=(141,), dtype=tf.int32),
"mask_positions": keras.Input(shape=(15,), dtype=tf.int32),
encoded_tokens = encoder(inputs["tokens"])
outputs = keras_nlp.layers.MLMHead(vocabulary_size = 141, activation="softmax")(encoded_tokens, inputs["mask_positions"])
x = layers.Lambda(lambda x: x[:,0,:])(encoded_tokens)
#Use lambda layer to extract embedding from 1st dim, corresponds to CLS
output_2 = layers.Dense(3, activation = "softmax")(x)
pretraining_model = keras.Model(inputs, outputs = [outputs, output_2])
pretraining_model.compile(loss=["sparse_categorical_crossentropy" "sparse_categorical_crossentropy"], optimizer = "Adam",weighted_metrics=["sparse_categorical_accuracy"], jit_compile=True)
The error I get is
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: ({'tokens': <tf.Tensor 'data_1:0' shape=(None, 141) dtype=int64>, 'mask_positions': <tf.Tensor 'data:0' shape=(None, 15) dtype=int64>}, <tf.Tensor 'data_2:0' shape=(None, 15) dtype=int64>, <tf.Tensor 'data_3:0' shape=(None, 15) dtype=float32>, <tf.Tensor 'data_4:0' shape=(None, 1) dtype=int64>)
Any help would be appreciated!


Tensor shapes for FFJORD bijector

I want to fit FFJORD bijector for transformation of two-dimensional dataset. The code is below (it is simplified version of my original code, but has the same problem).
import tensorflow as tf
import tensorflow_probability as tfp
tfb = tfp.bijectors
tfd = tfp.distributions
class ODE(tf.keras.layers.Layer):
def __init__(self):
super(ODE, self).__init__()
self.dense_layer1 = tf.keras.layers.Dense(4, activation = 'tanh')
self.dense_layer2 = tf.keras.layers.Dense(2)
def call(self, t, inputs):
return self.dense_layer2(self.dense_layer1(inputs))
ode = ODE()
ffjord = tfb.FFJORD(state_time_derivative_fn = ode)
base_distr = tfd.MultivariateNormalDiag(loc = tf.zeros(2), scale_diag = tf.ones(2))
td = tfd.TransformedDistribution(distribution = base_distr, bijector = ffjord)
x = tf.keras.Input(shape = (2,), dtype = tf.float32)
log_prob = td.log_prob(x)
model = tf.keras.Model(x, log_prob)
def NLL(y, log_prob):
return -log_prob
model.compile(optimizer = tf.optimizers.Adam(1.0e-2), loss = NLL)
history = = X_train, y = np.zeros(X_train.shape[0]), epochs = 100, verbose = 0, batch_size = 128)
I get error in line log_prob = td.log_prob(x): ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 2)
If I try to get a sample from transformed distribution td.sample(), it produces another error, but td.sample(1) works as well as some other calls, for example
x = tf.constant([[2.0, 3.0]])
ode(-1.0, x)
I guess that there is some problem with shapes, but can't understand where it is.

different results in inference between python and c++ opencv Mat::

i'm doing a re identification network, implementing a triplet-loss function, at that point everything is fine. the networks works fine in python, I implemented the network on keras with tensorflow as backend, I passed the .hd5 to a .pb file to make inference in tensorflow c++, the probmes is that with the same images the result is difference between python and c++ and I don't know why anyone to help me?
here is the the model in python:
import keras
import keras.applications
import keras.layers as layer
import tensorflow as tf
from keras import backend as K
from keras.backend.tensorflow_backend import set_session
from keras.models import Model as md
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
class Model:
def init(self, shape):
self.shape = shape
self.params = {
'optimizer': 'sgd',
'first_neuron': 12,
'first_max_pooling': 2,
'second_neuron': 12,
'second_max_pooling': 2,
'third_neuron': 20,
'third_max_pooling': 3,
'dense_neuron': 64,
'final_neuron': 128,
self.feature_model = self.create_features_model()
self.triplet_model = self.create_model()
def create_features_model(self):
# Define the vision modules
img_input = layer.Input(shape=(self.shape))
x = layer.Conv2D(self.params['first_neuron'], (3, 3), activation='relu')(img_input)
x = layer.MaxPooling2D((self.params['first_max_pooling'], self.params['first_max_pooling']))(x)
x = layer.Conv2D(self.params['second_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['second_max_pooling'], self.params['second_max_pooling']))(x)
x = layer.Conv2D(self.params['third_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['third_max_pooling'], self.params['third_max_pooling']))(x)
x = layer.Flatten()(x)
x = layer.Dense(self.params['dense_neuron'], activation='relu')(x)
x = layer.Dense(self.params['final_neuron'], activation='relu')(x)
out = layer.Lambda(lambda x: K.l2_normalize(x, axis=1), name='t_emb_1_lnorm')(x)
features_model = md(img_input, out)
return features_model
def create_model(self):
base_model = self.feature_model
# triplet framework, shared weights
input_shape = (self.shape)
input_target = layer.Input(shape=input_shape, name='input_target')
input_positive = layer.Input(shape=input_shape, name='input_pos')
input_negative = layer.Input(shape=input_shape, name='input_neg')
net_target = base_model(input_target)
net_positive = base_model(input_positive)
net_negative = base_model(input_negative)
# The Lamda layer produces output using given function. Here its Euclidean distance.
positive_distance = layer.Lambda(self.euclidean_distance, name='pos_dist')([net_target, net_positive])
negative_distance = layer.Lambda(self.euclidean_distance, name='neg_dist')([net_target, net_negative])
diference = layer.Lambda(self.euclidean_distance, name='dif')([net_positive, net_negative])
# This lambda layer simply stacks outputs so both distances are available to the objective
distances = layer.Lambda(lambda vects: K.stack(vects, axis=1), name='distance')(
[positive_distance, negative_distance, diference])
model = md([input_target, input_positive, input_negative], distances, name='result')
# Setting up optimizer designed for variable learning rate
model.compile(optimizer=keras.optimizers.Adam(lr=0.001, decay=0.00002),
loss=self.triplet_loss, metrics=[self.accuracy])
return model
def triplet_loss(self, _, y_pred):
margin = K.constant(0.5)
return K.mean(K.maximum(K.constant(0), K.square(y_pred[:, 0, 0]) - 0.5 * (
K.square(y_pred[:, 1, 0]) + K.square(y_pred[:, 2, 0])) + margin))
def accuracy(self, _, y_pred):
return K.mean(y_pred[:, 0, 0] < y_pred[:, 1, 0])
def lnorm(self, x):
return K.l2_normalize(x, axis=-1)
def euclidean_distance(self, vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
this is how I made inference on python:
from model import Model as model
from keras.utils import HDF5Matrix
import numpy as np
import cv2
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
def load_datasets(in_h5_path, partition='train'):
if partition == 'train':
target = HDF5Matrix(datapath=in_h5_path, dataset="targets")
positive = HDF5Matrix(datapath=in_h5_path, dataset="positives")
negative = HDF5Matrix(datapath=in_h5_path, dataset="negatives")
return target, positive, negative
print("Invalid 'partition' parameter: Valid values: ['train', 'test']")
tar = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/tra1.png")
nega = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/dec1.png")
tar = cv2.resize(tar, (32, 32), interpolation=cv2.INTER_CUBIC)
nega = cv2.resize(nega, (32, 32), interpolation=cv2.INTER_CUBIC)
t1 = np.array(tar).reshape((1, 32, 32, 3))
t2 = np.array(nega).reshape((1, 32, 32, 3))
target, positive, negative = load_datasets('/home/amejia/PycharmProjects/lossDatasetGenerator/test/test32.h5')
net = model((32, 32, 3))
enter = [t1, t2, t1]
a = net.triplet_model.predict(x=enter, batch_size=1)
the inference in c++ :
in c++ this si how I made inference:
tensorflow::Tensor target(tensorflow::DT_FLOAT,
{1, image_size, image_size, 3}));
tensorflow::Tensor positive(tensorflow::DT_FLOAT,
{1, image_size, image_size, 3}));
img_to_float2(tracks, detections, target, positive, frame);
std::vector<std::pair<std::string, tensorflow::Tensor>> Input = {{"input_target:0", target},
{"input_pos:0", positive},
{"input_neg:0", target}};
std::vector<tensorflow::Tensor> Outputs;
tensorflow::Status Status = session->Run(Input, {"distance/stack:0"}, {}, &Outputs);
auto data = Outputs[0].flat<float>();
std::cout << Outputs[0].DebugString() << std::endl;
and this is the function to put create the in tensor:
void LossModel::img_to_float2(Track &tracks, Detection &detections, tensorflow::Tensor &tracksTensor,
tensorflow::Tensor &detectionsTensor, cv::Mat &frame) {
auto *tar = tracksTensor.flat<float>().data();
auto *dec = detectionsTensor.flat<float>().data();
cv::Mat detectionImg = frame(detections.getBox()).clone();
resize(detectionImg, detectionImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::Mat resizedImage(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, dec);
detectionImg.convertTo(resizedImage, CV_32FC3);
cv::Mat trackImg = tracks.get_img().clone();
resize(trackImg, trackImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::Mat resizedImage2(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, tar);
trackImg.convertTo(resizedImage2, CV_32FC3);

"Could not compute output" error using tf.keras merge layers in Tensorflow 2

I'm trying to use a merge layer in tf.keras but getting AssertionError: Could not compute output Tensor("concatenate_3/Identity:0", shape=(None, 10, 8), dtype=float32). Minimal (not)working example:
import tensorflow as tf
import numpy as np
context_length = 10
input_a = tf.keras.layers.Input((context_length, 4))
input_b = tf.keras.layers.Input((context_length, 4))
#output = tf.keras.layers.concatenate([input_a, input_b]) # same error
output = tf.keras.layers.Concatenate()([input_a, input_b])
model = tf.keras.Model(inputs = (input_a, input_b), outputs = output)
a = np.random.rand(3, context_length, 4).astype(np.float32)
b = np.random.rand(3, context_length, 4).astype(np.float32)
pred = model(a, b)
I get the same error with other merge layers (e.g. add). I'm on TF2.0.0-alpha0 but get the same with 2.0.0-beta1 on colab.
Ok well the error message was not helpful but I eventually stumbled upon the solution: the input to model needs to be an iterable of tensors, i.e.
pred = model((a, b))
works just fine.
It fails because of the tf.keras.layers.Input. Tensorflow can't validate the shape of the layer thus it fails. This will work:
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.concat = tf.keras.layers.Concatenate()
# You can also add the other layers
self.dense_1 = tf.keras.layers.Dense(10)
def call(self, a, b):
out_concat = self.concat([a, b])
out_dense = self.dense_1(out_concat)
model = MyModel()
a = np.random.rand(3, 5, 4).astype(np.float32)
b = np.random.rand(3, 5, 4).astype(np.float32)
output = model(a, b)

The two structures don't have the same nested structure while adding return_state=True over LSTM

I don't know if it is kind of bug or an error.
I have also reported this issue here.
The thing I am trying to do is that I want to make my custom LSTM statefull.
So this code running fine without adding return_state=True. Once I add this to the code it raises this error : The two structures don't have the same nested structure.
This is a reproducible code:
from keras.layers import Lambda
import keras
import numpy as np
import tensorflow as tf
def rev_entropy(x):
def row_entropy(row):
_, _, count = tf.unique_with_counts(row)
count = tf.cast(count,tf.float32)
prob = count / tf.reduce_sum(count)
prob = tf.cast(prob,tf.float32)
rev = -tf.reduce_sum(prob * tf.log(prob))
return rev
nw = tf.reduce_sum(x,axis=1)
rev = tf.map_fn(row_entropy, x)
rev = tf.where(tf.is_nan(rev), tf.zeros_like(rev), rev)
rev = tf.cast(rev, tf.float32)
max_entropy = tf.log(tf.clip_by_value(nw,2,LATENT_SIZE))
concentration = (max_entropy/(1+rev))
new_x = x * (tf.reshape(concentration, [BATCH_SIZE, 1]))
return new_x
inputs = keras.layers.Input(shape=(SEQUENCE_LEN,), name="input")
embedding = keras.layers.Embedding(output_dim=EMBED_SIZE, input_dim=VOCAB_SIZE, input_length=SEQUENCE_LEN, trainable=True)(inputs)
encoded = keras.layers.Bidirectional(keras.layers.LSTM(LATENT_SIZE,return_state=True), merge_mode="sum", name="encoder_lstm")(embedding)
encoded = Lambda(rev_entropy)(encoded)
decoded = keras.layers.RepeatVector(SEQUENCE_LEN, name="repeater")(encoded)
decoded = keras.layers.Bidirectional(keras.layers.LSTM(EMBED_SIZE, return_sequences=True,return_state=True), merge_mode="sum", name="decoder_lstm")(decoded)
autoencoder = keras.models.Model(inputs, decoded)
autoencoder.compile(optimizer="sgd", loss='mse')
x = np.random.randint(0, 90, size=(10, 45))
y = np.random.normal(size=(10, 45, 50))
history =, y, epochs=1)
After applying the idea of the comment tf.map_fn(row_entropy, encoded,dtype=tf.float32), I received a new error:
ValueError: Layer repeater expects 1 inputs, but it received 5 input tensors. Input received: [<tf.Tensor 'encoder_lstm/add_16:0' shape=(?, 20) dtype=float32>, <tf.Tensor 'encoder_lstm/while/Exit_3:0' shape=(?, 20) dtype=float32>, <tf.Tensor 'encoder_lstm/while/Exit_4:0' shape=(?, 20) dtype=float32>, <tf.Tensor 'encoder_lstm/while_1/Exit_3:0' shape=(?, 20) dtype=float32>, <tf.Tensor 'encoder_lstm/while_1/Exit_4:0' shape=(?, 20) dtype=float32>]
Also, consider that this error raises even without that lambda layer, So it seems there is something else wrong.
If I try encoded.shape, it says encoded is a list with length 5 however it has to be a tensor with (batch_size, latent size)!!!
everything is fine without adding return_state=True
Any help s appreciated!

how to calculate the derivate value of Multi-input models in keras by with tensorflow backend

My question is: I want to calculate the derivation of "time input" and "dense_input". Before asking question, I search the soluatoin of calculaing jacobian matrix by keras function.
After running it, But I got this error:
File "\keras\backend\", line 2614, in _call
AttributeError: 'list' object has no attribute 'dtype'
Here is my simple version:
from keras.models import *
from keras.layers import *
import keras.backend as K
import pandas as pd
from keras import optimizers
def get_model(timestamp, features):
time_input = Input(shape=(timestamp, features,), name='time_input')
lstm_out = LSTM(4)(time_input)
dense_hidden_units = 2
dense_input_layer = Input(shape=(dense_length,), name='dense_input_layer')
final_input_layer = concatenate([lstm_out, dense_input_layer])
# Disable biases in the hidden layer
dense_1 = Dense(units=dense_hidden_units, use_bias=False, activation='sigmoid')(final_input_layer)
# Disable bias in output layer
output_layer = Dense(units=1, use_bias=False, name='final_output')(dense_1)
model = Model(
inputs=[time_input, dense_input_layer],
return model
if __name__ == '__main__':
timestamp = 3
features = 1
dense_length = 3
temp_data = pd.DataFrame([
[1, 2, 3, 2, 3, 4],
time_data = temp_data.values.reshape(-1, timestamp, features)
dense_data = temp_data.values.reshape(-1, dense_length)
target_data = np.array([1, 2])
model = get_model(
timestamp, features
Ada = optimizers.Adagrad(lr=0.09, epsilon=1e-04)
model.compile(loss='mse', optimizer=Ada, metrics=['mse'])
'time_input': time_data,
'dense_input_layer': dense_data,
'final_output': target_data
epochs=1, batch_size=1
time_input = model.get_layer('time_input').input
GPP_input_layer = model.get_layer('dense_input_layer').input
J = K.gradients(model.output, [time_input, GPP_input_layer])
jacobianTime = K.function([[time_input, GPP_input_layer], K.learning_phase()], J)
deriRes = jacobianTime([time_data, dense_data]) # this line throw exception
Thanks for help!
You have an extra set of brackets.
jacobianTime = K.function([[time_input, GPP_input_layer], K.learning_phase()], J)
jacobianTime = K.function([time_input, GPP_input_layer, K.learning_phase()], J)
I was able to run your code like this at least.