Multiple Feature Input test Quantized TFLite model problem - tensorflow

I am trying to testing a quantized SqueezeNet model with many inputs, I have implemented my predict part like:
interpreter = tf.lite.Interpreter(model_path="SqueezeNetwide_Quant_model.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Test model on some input data.
input_shape = input_details[0]['shape']
input_shape1 = input_details[1]['shape']
input_shape2 = input_details[2]['shape']
input_shape3 = input_details[3]['shape']
acc=0
for i in range(len(x_test)):
input_data = np.array(x_test_hog_vector[i].reshape(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)
input_data1 = np.array(x_test[i].reshape(input_shape1), dtype=np.float32)
interpreter.set_tensor(input_details[1]['index'], input_data1)
input_data2 = np.array(x_test_mean[i].reshape(input_shape2), dtype=np.float32)
interpreter.set_tensor(input_details[2]['index'], input_data2)
input_data3 = np.array(x_test_invariant_Red[i].reshape(input_shape3), dtype=np.float32)
interpreter.set_tensor(input_details[3]['index'], input_data3)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
results = np.squeeze(output_data)
if(np.argmax(results) == np.argmax(y_test[i])):
acc+=1
acc = acc/len(x_test)
print(acc*100)
This is working for 1 input and 1 output solution only (Standard deep learning solution)
But here, I would like to pass the traditional features to deep learning side such as mean value, hog, lbp and so on for my research.
But I cannot observe the desired accuracy, I can observe around 15% but without quantization I saw 77+.
(There is no problem exist on quantization part, 1 input solution is working as I want)

Related

Completely different results using Tensorflow and Pytorch for MobilenetV3 Small

I am using transfer learning from MobileNetV3 Small to predict 5 different points on an image. I am doing this as a regression task.
For both models:
Setting the last 50 layers trainable and adding the same fully connected layers to the end.
Learning rate 3e-2
Batch size 32
Adam optimizer with the same betas
100 epochs
The inputs consist of RGB unscaled images
Pytorch
Model
def _init_weights(m):
if type(m) == nn.Linear:
nn.init.xavier_uniform_(m.weight)
m.bias.data.fill_(0.01)
def get_mob_v3_small():
model = torchvision.models.mobilenet_v3_small(pretrained=True)
children_list = get_children(model)
for c in children_list[:-50]:
for p in c.parameters():
p.requires_grad = False
return model
class TransferMobileNetV3_v2(nn.Module):
def __init__(self,
num_keypoints: int = 5):
super(TransferMobileNetV3_v2, self).__init__()
self.classifier_neurons = num_keypoints*2
self.base_model = get_mob_v3_small()
self.base_model.classifier = nn.Sequential(
nn.Linear(in_features=1024, out_features=1024),
nn.ReLU(),
nn.Linear(in_features=1024, out_features=512),
nn.ReLU(),
nn.Linear(in_features=512, out_features=self.classifier_neurons)
)
self.base_model.apply(_init_weights)
def forward(self, x):
out = self.base_model(x)
return out
Training Script
def train(net, trainloader, testloader, train_loss_fn, optimizer, scaler, args):
len_dataloader = len(trainloader)
for epoch in range(1, args.epochs+1):
net.train()
for batch_idx, sample in enumerate(trainloader):
inputs, labels = sample
inputs, labels = inputs.to(args.device), labels.to(args.device)
optimizer.zero_grad()
with torch.cuda.amp.autocast(args.use_amp):
prediction = net(inputs)
loss = train_loss_fn(prediction, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
def main():
args = make_args_parser()
args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
seed = args.seed
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=3e-2,
betas=(0.9, 0.999))
scaler = torch.cuda.amp.GradScaler(enabled=args.use_amp)
train(net, train_loader, test_loader, loss_fn, optimizer, scaler, args)
Tensorflow
Model
base_model = tf.keras.applications.MobileNetV3Small(weights='imagenet',
input_shape=(224,224,3))
x_in = base_model.layers[-6].output
x = Dense(units=1024, activation="relu")(x_in)
x = Dense(units=512, activation="relu")(x)
x = Dense(units=10, activation="linear")(x)
model = Model(inputs=base_model.input, outputs=x)
for layer in model.layers[:-50]:
layer.trainable=False
Training Script
model.compile(loss = "mse",
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-2))
history = model.fit(input_numpy, output_numpy,
verbose=1,
batch_size=32, epochs=100,validation_split = 0.2)
Results
The PyTorch model predicts one single point around the center for all 5 different points.
The Tensorflow model predicts the points quite well and are quite accurate.
The loss in the Pytorch model is much higher than the Tensorflow model.
Please do let me know what is going wrong as I am trying my best to shift to PyTorch for this work and I need this model to give me similar/identical results. Please do let me know what is going wrong as I am trying my best to shift to PyTorch for this work and I need this model to give me similar/identical results.
Note: I also noticed that the MobileNetV3 Small model seems to be different in PyTorch and different in Tensorflow. I do not know if am interpreting it wrong, but I'm putting it here just in case.

How to merge ReLU after quantization aware training

I have a network which contains Conv2D layers followed by ReLU activations, declared as such:
x = layers.Conv2D(self.hparams['channels_count'], kernel_size=(4,1))(x)
x = layers.ReLU()(x)
And it is ported to TFLite with the following representaiton:
Basic TFLite network without Q-aware training
However, after performing quantization-aware training on the network and porting it again, the ReLU layers are now explicit in the graph:
TFLite network after Q-aware training
This results in them being processed separately on the target instead of during the evaluation of the Conv2D kernel, inducing a 10% performance loss in my overall network.
Declaring the activation with the following implicit syntax does not produce the problem:
x = layers.Conv2D(self.hparams['channels_count'], kernel_size=(4,1), activation='relu')(x)
Basic TFLite network with implicit ReLU activation
TFLite network with implicit ReLU after Q-aware training
However, this restricts the network to basic ReLU activation, whereas I would like to use ReLU6 which cannot be declared in this way.
Is this a TFLite issue? If not, is there a way to prevent the ReLU layer from being split? Or alternatively, is there a way to manually merge the ReLU layers back into the Conv2D layers after the quantization-aware training?
Edit:
QA training code:
def learn_qaware(self):
quantize_model = tfmot.quantization.keras.quantize_model
self.model = quantize_model(self.model)
training_generator = SCDataGenerator(self.training_set)
validate_generator = SCDataGenerator(self.validate_set)
self.model.compile(
optimizer=self.configure_optimizers(qa_learn=True),
loss=self.get_LLP_loss(),
metrics=self.get_metrics(),
run_eagerly=config['eager_mode'],
)
self.model.fit(
training_generator,
epochs = self.hparams['max_epochs'],
batch_size = 1,
shuffle = self.hparams['shuffle_curves'],
validation_data = validate_generator,
callbacks = self.get_callbacks(qa_learn=True),
)
Quantized TFLite model generation code:
def tflite_convert(classifier):
output_file = get_tflite_filename(classifier.model_path)
# Convert the model to the TensorFlow Lite format without quantization
saved_shape = classifier.model.input.shape.as_list()
fixed_shape = saved_shape
fixed_shape[0] = 1
classifier.model.input.set_shape(fixed_shape) # Force batch size to 1 for generation
converter = tf.lite.TFLiteConverter.from_keras_model(classifier.model)
classifier.model.input.set_shape(saved_shape)
# Set the optimization flag.
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Enforce integer only quantization
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
# Provide a representative dataset to ensure we quantize correctly.
if config['eager_mode']:
tf.executing_eagerly()
def representative_dataset():
for x in classifier.validate_set.get_all_inputs():
rs = x.reshape(1, x.shape[0], 1, 1).astype(np.float32)
yield([rs])
converter.representative_dataset = representative_dataset
model_tflite = converter.convert()
# Save the model to disk
open(output_file, "wb").write(model_tflite)
return TFLite_model(output_file)
I have found a workaround which works by instantiating a non-trained version of the model, then copying over the weights from the quantization aware trained model before converting to TFLite.
This seems like quite a hack, so I'm still on the lookout for a cleaner solution.
Code for the workaround:
def dequantize(self):
if not hasattr(self, 'fp_model') or not self.fp_model:
self.fp_model = self.get_default_model()
def find_layer_in_model(name, model):
for layer in model.layers:
if layer.name == name:
return layer
return None
def find_weight_group_in_layer(name, layer):
for weight_group in quant_layer.trainable_weights:
if weight_group.name == name:
return weight_group
return None
for layer in self.fp_model.layers:
if 'input' in layer.name or 'quantize_layer' in layer.name:
continue
QUANT_TAG = "quant_"
quant_layer = find_layer_in_model(QUANT_TAG+layer.name,self.model)
if quant_layer is None:
raise RuntimeError('Failed to match layer ' + layer.name)
for i, weight_group in enumerate(layer.trainable_weights):
quant_weight_group = find_weight_group_in_layer(QUANT_TAG+weight_group.name, quant_layer)
if quant_weight_group is None:
quant_weight_group = find_weight_group_in_layer(weight_group.name, quant_layer)
if quant_weight_group is None:
raise RuntimeError('Failed to match weight group ' + weight_group.name)
layer.trainable_weights[i].assign(quant_weight_group)
self.model = self.fp_model
You can pass activation=tf.nn.relu6 to use ReLU6 activation.

tf.keras.backend.function for transforming embeddings inside tf.data.dataset

I am trying to use the output of a neural network to transform data inside tf.data.dataset. Specifically, I am using a Delta-Encoder to manipulate embeddings inside the tf.data pipeline. In so doing, however, I get the following error:
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
I have searched the dataset pipeline page and stack overflow, but I could not find something that addresses my question. In the code below I am using an Autoencoder, as it yields an identical error with more concise code.
The offending part seems to be
[[x,]] = tf.py_function(Auto_Func, [x], [tf.float32])
inside
tf_auto_transform.
num_embeddings = 100
input_dims = 1000
embeddings = np.random.normal(size = (num_embeddings, input_dims)).astype(np.float32)
target = np.zeros(num_embeddings)
#creating Autoencoder
inp = Input(shape = (input_dims,), name ='input')
hidden = Dense(10, activation = 'relu', name = 'hidden')(inp)
out = Dense(input_dims, activation = 'relu', name='output')(hidden)
auto_encoder = tf.keras.models.Model(inputs =inp, outputs=out)
Auto_Func = tf.keras.backend.function(inputs = Autoencoder.get_layer(name='input').input,
outputs = Autoencoder.get_layer(name='output').input )
#Autoencoder transform for dataset.map
def tf_auto_transform(x, target):
x_shape = x.shape
##tf.function
#def func(x):
# return tf.py_function(Auto_Func, [x], [tf.float32])
#[[x,]] = func(x)
[[x,]] = tf.py_function(Auto_Func, [x], [tf.float32])
x.set_shape(x_shape)
return x, target
def get_dataset(X,y, batch_size = 32):
train_ds = tf.data.Dataset.from_tensor_slices((X, y))
train_ds = train_ds.map(tf_auto_transform)
train_ds = train_ds.batch(batch_size)
return train_ds
dataset = get_dataset(embeddings, target, 2)
The above code yields the following error:
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
I tried to eliminate the error by running the commented out section of the tf_auto_transform function, but the error persisted.
SideNote: While it is true that the Delta encoder paper has code, it is written in tf 1.x. I am trying to use tf 2.x with the tf functional API instead. Thank you for your help!
At the risk of outing myself as a n00b, the answer is to switch the order of the map and batch functions. I am trying to apply a neural network to make some changes on data. tf.keras models take batches as input, not individual samples. By batching the data first, I can run batches through my nn.
def get_dataset(X,y, batch_size = 32):
train_ds = tf.data.Dataset.from_tensor_slices((X, y))
#The changed order
train_ds = train_ds.batch(batch_size)
train_ds = train_ds.map(tf_auto_transform)**strong text**
return train_ds
It really is that simple.

How to train any Hugging face transformer model (eg DistilBERT) for question answer from scratch using Tensorflow backend?

I want to understand how to train a hugging face transformer model (like BERT, DistilBERT, etc) for the question-answer system and TensorFlow as backend. Following is the logic that I am currently using (but I am not sure whether is it right approach):
I am using SQuAD v1.1 dataset.
In SQuAd dataset answer to any question is always present in context. So to put in simple words I am trying to predict start index and end index and answer.
I have transformed the dataset for the same purpose. I have added the start index and end index of on word level after performing tokenization. Here is how my dataset looks,
Next I am encoded question and context as per hugging face docs guide and returing input_ids, attention_ids and token_type_ids; which will be used as input to model.
def tokenize(questions, contexts):
input_ids, input_masks, input_segments = [],[],[]
for question,context in tqdm_notebook(zip(questions, contexts)):
inputs = tokenizer.encode_plus(question,context, add_special_tokens=True, max_length=512, pad_to_max_length=True,return_attention_mask=True, return_token_type_ids=True )
input_ids.append(inputs['input_ids'])
input_masks.append(inputs['attention_mask'])
input_segments.append(inputs['token_type_ids'])
return [np.asarray(input_ids, dtype='int32'), np.asarray(input_masks, dtype='int32'), np.asarray(input_segments, dtype='int32')]
Finally I define a Keras model which takes this three input and predict two value, start and end word index of answer from given context.
input_ids_in = tf.keras.layers.Input(shape=(512,), name='input_token', dtype='int32')
input_masks_in = tf.keras.layers.Input(shape=(512,), name='masked_token', dtype='int32')
input_segment_in = tf.keras.layers.Input(shape=(512,), name='segment_token', dtype='int32')
embedding_layer = transformer_model({'inputs':input_ids_in,'attention_mask':input_masks_in,
'token_type_ids':input_segment_in})[0]
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(embedding_layer)
X = tf.keras.layers.GlobalMaxPool1D()(X)
start_branch = tf.keras.layers.Dense(1024, activation='relu')(X)
start_branch = tf.keras.layers.Dropout(0.3)(start_branch)
start_branch_output = tf.keras.layers.Dense(512, activation='softmax', name='start_branch')(start_branch)
end_branch = tf.keras.layers.Dense(1024, activation='relu')(X)
end_branch = tf.keras.layers.Dropout(0.3)(end_branch)
end_branch_output = tf.keras.layers.Dense(512, activation='softmax', name='end_branch')(end_branch)
model = tf.keras.Model(inputs=[input_ids_in, input_masks_in, input_segment_in], outputs = [start_branch_output, end_branch_output])
I am using last softmax layer with 512 units as it is my max no of words I my aim is to predict index dromit.

Variational Autoencoder in Keras: How to achieve different output of a Keras Layer at the time of training and prediction?

We're implementing a paper titled - "Variational Autoencoders for Collaborative Filtering" in TF 2.0.
The sample implementation of the above paper in TF 1.0 is given here.
The paper proposes an implementation of a Variational Autoencoder for collaborative filtering. As the output of the encoder, it uses the reparametrization trick to sample the latent vector Z at the time of training the network.
The reparametrization trick samples ϵ ∼ N (0, IK) and reparametrize the latent vector Z as:
Zu = µϕ(xu ) + ϵ ⊙ σϕ(xu) where µϕ and σϕ are calculated from the output of the encoder.
But, at the time of prediction, the paper proposes to use only µϕ for sampling Z.
In our implementation, we used a custom tf.keras.layers.Layer to sample the latent vector Z. The following is the code of the architecture:
class Reparameterize(tf.keras.layers.Layer):
"""
Custom layer.
Reparameterization trick, sample random latent vectors Z from
the latent Gaussian distribution.
The sampled vector Z is given by
sampled_z = mean + std * epsilon
"""
def call(self, inputs):
Z_mu, Z_logvar = inputs
Z_sigma = tf.math.exp(0.5 * Z_logvar)
epsilon = tf.random.normal(tf.shape(Z_sigma))
return Z_mu + Z_sigma * epsilon
class VAE:
def __init__(self, input_dim, latent_dim=200):
# encoder
encoder_input = Input(shape=input_dim)
X = tf.math.l2_normalize(encoder_input, 1)
X = Dropout(0.5)(X)
X = Dense(600, activation='tanh')(X)
Z_mu = Dense(latent_dim)(X)
Z_logvar = Dense(latent_dim)(X)
sampled_Z = Reparameterize()([Z_mu, Z_logvar])
# decoder
decoder_input = Input(shape=latent_dim)
X = Dense(600, activation='tanh')(decoder_input)
logits = Dense(input_dim)(X)
# define losses
"""
custom loss function
def loss(X_true, X_pred)
"""
# create models
self.encoder = Model(encoder_input, [Z_logvar, Z_mu, sampled_Z], name='encoder')
self.decoder = Model(decoder_input, logits, name='decoder')
self.vae = Model(encoder_input, self.decoder(sampled_Z), name='vae')
self.vae.add_loss(kl_divergence(Z_logvar, Z_mu))
# compile the model
self.vae.compile(optimizer='adam', loss=loss, metrics=[loss])
Now, I am looking for a way to change the implementation of the custom Reparameterize layer at the time of prediction to use only µϕ (Z_mu) for sampling Z so as to achieve what is proposed by the paper mentioned above.
Or if there's another way of doing so in Tf 2.0, kindly recommend.
You could do:
# create your VAE model
my_vae = VAE(input_dim = my_input_dim)
# Train it as you wish
# .....
When training is done, you could use it as follows:
inp = Input(shape = my_input_dim)
_, Z_mu,_ = my_vae.encoder(inp) # my_vae is your trained model, get its outputs
decoder_output = my_vae.decoder(Z_mu) # use the Z_mu as input to decoder
vae_predictor = Model(inp, decoder_output) # create your prediction time model
You could use the vae_predictor model now for predictions.