Convert keras model to pytorch - tensorflow

Is there an easy way to convert a model like this from keras to pytorch?
I have the code in keras as following:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
state_dim = 10
architecture = (256, 256) # units per layer
learning_rate = 0.0001 # learning rate
l2_reg = 0.00000001 # L2 regularization
trainable = True
num_actions = 3
layers = []
n = len(architecture) # n = 2
for i, units in enumerate(architecture, 1):
layers.append(Dense(units=units,
input_dim=state_dim if i == 1 else None,
activation='relu',
kernel_regularizer=l2(l2_reg),
name=f'Dense_{i}',
trainable=trainable))
layers.append(Dropout(.1))
layers.append(Dense(units=num_actions,
trainable=trainable,
name='Output'))
model = Sequential(layers)
model.compile(loss='mean_squared_error',
optimizer=Adam(lr=learning_rate))
Which outputs as follow:
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Dense_1 (Dense) (None, 256) 2816
_________________________________________________________________
Dense_2 (Dense) (None, 256) 65792
_________________________________________________________________
dropout_3 (Dropout) (None, 256) 0
_________________________________________________________________
Output (Dense) (None, 3) 771
=================================================================
Total params: 69,379
Trainable params: 69,379
Non-trainable params: 0
_________________________________________________________________
None
I must admit, I'm a little out of my depth so any advice is appreciated. I'm trying to read through the pytorch docs and will update my question with a possible answer if I manage.

Here is my best attempt:
state_dim = 10
architecture = (256, 256) # units per layer
learning_rate = 0.0001 # learning rate
l2_reg = 0.00000001 # L2 regularization
trainable = True
num_actions = 3
import torch
from torch import nn
class CustomModel(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(state_dim, architecture[0]),
nn.ReLU(),
nn.Linear(architecture[0], architecture[1]),
nn.ReLU(),
nn.Dropout(0.25),
nn.Linear(architecture[1], num_actions),
)
def forward(self, x):
return self.layers(x)
model = CustomModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
It outputs a promising looking output:
CustomModel(
(layers): Sequential(
(0): Linear(in_features=10, out_features=256, bias=True)
(1): ReLU()
(2): Linear(in_features=256, out_features=256, bias=True)
(3): ReLU()
(4): Dropout(p=0.25, inplace=False)
(5): Linear(in_features=256, out_features=3, bias=True)
)
)
However a few items are still left unanswered:
are the activations in the right place?
how do we add a kernel_regularizer = l2(l2_reg) to the first two Linear/Dense layers?
and how do we make the layers trainable?
Any input appreciated.

Related

with `with strategy.scope():` BERT output loses it's shape from tf-hub and `encoder_output` is missing

Code:
!pip install tensorflow-text==2.7.0
import tensorflow_text as text
import tensorflow_hub as hub
# ... other tf imports....
strategy = tf.distribute.MirroredStrategy()
print('Number of GPU: ' + str(strategy.num_replicas_in_sync)) # 1 or 2, shouldn't matter
NUM_CLASS=2
with strategy.scope():
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")
def get_model():
text_input = Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)
output_sequence = outputs['sequence_output']
x = Dense(NUM_CLASS, activation='sigmoid')(output_sequence)
model = Model(inputs=[text_input], outputs = [x])
return model
optimizer = Adam()
model = get_model()
model.compile(loss=CategoricalCrossentropy(from_logits=True),optimizer=optimizer,metrics=[Accuracy(), ],)
model.summary() # <- look at the output 1
tf.keras.utils.plot_model(model, show_shapes=True, to_file='model.png') # <- look at the figure 1
with strategy.scope():
optimizer = Adam()
model = get_model()
model.compile(loss=CategoricalCrossentropy(from_logits=True),optimizer=optimizer,metrics=[Accuracy(), ],)
model.summary() # <- compare with output 1, it has already lost it's shape
tf.keras.utils.plot_model(model, show_shapes=True, to_file='model_scoped.png') # <- compare this figure too, for ease
With scope, BERT loses seq_length, and it becomes None.
Model summary withOUT scope: (See there is 128 at the very last layer, which is seq_length)
Model: "model_6"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
text (InputLayer) [(None,)] 0 []
keras_layer_2 (KerasLayer) {'input_mask': (Non 0 ['text[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_3 (KerasLayer) multiple 109482241 ['keras_layer_2[6][0]',
'keras_layer_2[6][1]',
'keras_layer_2[6][2]']
dense_6 (Dense) (None, 128, 2) 1538 ['keras_layer_3[6][14]']
==================================================================================================
Total params: 109,483,779
Trainable params: 1,538
Non-trainable params: 109,482,241
__________________________________________________________________________________________________
Model with scope:
Model: "model_7"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
text (InputLayer) [(None,)] 0 []
keras_layer_2 (KerasLayer) {'input_mask': (Non 0 ['text[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_3 (KerasLayer) multiple 109482241 ['keras_layer_2[7][0]',
'keras_layer_2[7][1]',
'keras_layer_2[7][2]']
dense_7 (Dense) (None, None, 2) 1538 ['keras_layer_3[7][14]']
==================================================================================================
Total params: 109,483,779
Trainable params: 1,538
Non-trainable params: 109,482,241
__________________________________________________________________________________________________
If these image helps:
Another notable thing encoder_outputs is also missing if you take a look at the 2nd keras layer or 3rd layer of both model.

Converting GRU layer from PyTorch to TensorFlow

I am trying to convert the following GRU layer from PyTorch(1.9.1) to TensorFlow(2.6.0):
# GRU layer
self.gru = nn.GRU(64, 32, bidirectional=True, num_layers=2, dropout=0.25, batch_first=True)
I am unsure about my current implementation, especially regarding the conversion of the parameters bidirectional and num_layers. My current reconstruction is the following:
# GRU Layer
model.add(Bidirectional(GRU(32, return_sequences=True, dropout=0.25, time_major=False)))
model.add(Bidirectional(GRU(32, return_sequences=True, dropout=0.25, time_major=False)))
Am I missing something? Thanks for your help in advance!
yes these two models are the same, at least from the number of parameters and the output shape point of view:
In pytorch:
import torch
model = torch.nn.Sequential(torch.nn.GRU(64, 32, bidirectional=True, num_layers=2, dropout=0.25, batch_first=True))
from torchinfo import summary
batch_size = 16
summary(model, input_size=(batch_size, 100, 64))
> ========================================================================================== Layer (type:depth-idx) Output Shape
> Param #
> ========================================================================================== Sequential -- --
> ├─GRU: 1-1 [16, 100, 64]
> 37,632
> Total params: 37,632 Trainable params: 37,632 Non-trainable params: 0
> Total mult-adds (M): 60.21
> ============================================================================= Input size (MB): 0.41 Forward/backward pass size (MB): 0.82 Params
> size (MB): 0.15 Estimated Total Size (MB): 1.38
> =============================================================================
In Tensorflow:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Bidirectional, GRU
# GRU Layer
model = Sequential()
model.add(Bidirectional(GRU(32, return_sequences=True, dropout=0.25, time_major=False)))
model.add(Bidirectional(GRU(32, return_sequences=True, dropout=0.25, time_major=False)))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='mse')
a = model.call(inputs=tf.random.normal(shape=(16, 100, 64)))
model.summary()
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
bidirectional_8 (Bidirection (16, 100, 64) 18816
_________________________________________________________________
bidirectional_9 (Bidirection (16, 100, 64) 18816
=================================================================
Total params: 37,632
Trainable params: 37,632
Non-trainable params: 0

Issue in removing layer from a pretrained model

I have the following code, I need to remove some layers of the model and perform prediction. But currently I am retrieving error.
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
from keras.models import Model
from tensorflow.python.keras.optimizers import SGD
base_model = ResNet50(include_top=False, weights='imagenet')
model= Model(inputs=base_model.input, outputs=base_model .layers[-2].output)
#model = Model(inputs=base_model.input, outputs=predictions)
#Compiling the model
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics =
['accuracy'])
img_path = 'elephant.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
preds = model.predict(x)
#decode the results into a list of tuples (class, description, probability)
#(one such list for each sample in the batch)
print('Predicted:', decode_predictions(preds, top=3)[0])
error
File "C:/Users/learn/remove_layer.py", line 9, in <module>
model= Model(inputs=base_model.input, outputs=base_model .layers[-2].output)
AttributeError: 'Tensor' object has no attribute '_keras_shape'
Due to my beginner's knowledge in Keras what I understood is the shape issue. Since its a resnet model, if I remove a layer from one merge to another merge layer, because merge layer doesn't have dimension issues, how can I accomplish this?
You actually need to visualize what you have done, so lets do little summary for last layers of ResNet50 Model:
base_model.summary()
conv5_block3_2_relu (Activation (None, None, None, 5 0 conv5_block3_2_bn[0][0]
__________________________________________________________________________________________________
conv5_block3_3_conv (Conv2D) (None, None, None, 2 1050624 conv5_block3_2_relu[0][0]
__________________________________________________________________________________________________
conv5_block3_3_bn (BatchNormali (None, None, None, 2 8192 conv5_block3_3_conv[0][0]
__________________________________________________________________________________________________
conv5_block3_add (Add) (None, None, None, 2 0 conv5_block2_out[0][0]
conv5_block3_3_bn[0][0]
__________________________________________________________________________________________________
conv5_block3_out (Activation) (None, None, None, 2 0 conv5_block3_add[0][0]
==================================================================================================
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
_____________________________
And now your model after removing last layer
model.summary()
conv5_block3_2_relu (Activation (None, None, None, 5 0 conv5_block3_2_bn[0][0]
__________________________________________________________________________________________________
conv5_block3_3_conv (Conv2D) (None, None, None, 2 1050624 conv5_block3_2_relu[0][0]
__________________________________________________________________________________________________
conv5_block3_3_bn (BatchNormali (None, None, None, 2 8192 conv5_block3_3_conv[0][0]
__________________________________________________________________________________________________
conv5_block3_add (Add) (None, None, None, 2 0 conv5_block2_out[0][0]
conv5_block3_3_bn[0][0]
==================================================================================================
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
Reset50 in keras output is all the feature map after the last Conv2D blocks it doesn't care about the classfication part of your model, what you actualy did is that you just removed the last activation layer after the last addition block
So you need check more which block layer you wanna remove and add flatten and fully connected layer for the classfication part
Also as mentioned by Dr.Snoopy, dont mix imports between keras and tensorflow.keras
# this part
from tensorflow.keras.models import Model

Transfer learning for video classification

How can I use pre-trained models to train video classification model? My dataset shape is (4000,10,150,150,1), I try to classify human action recognition with Conv2D TimeDistributed.
I can train without transfer learning but I get a poor accuracy.
What I have tried:
from keras.applications import VGG16
conv_base = VGG16(weights='imagenet',
include_top=False,
input_shape=(150, 150, 3))
model = models.Sequential()
model.add(conv_base)
model.add(TimeDistributed(Conv2D(96, (3, 3), padding='same',
input_shape=x_train.shape[1:])))
model.add(TimeDistributed(Activation('relu')))
model.add(TimeDistributed(Conv2D(128, (3, 3))))
model.add(TimeDistributed(Activation('relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
model.add(TimeDistributed(Dropout(0.35)))
.
.
.
.
But I got ValueError: strides should be of length 1, 1 or 3 but was 2
Someone has any idea?
I'm assuming you have 10 frames for each video. It's a simple model which uses VGG16 features (GloabAveragePooling) for each frame, and LSTM to classify the frame sequences.
You can experiment by adding a few more layers, changing hyperparameters.
N.B: There are many inconsistencies in your model including passing 5-d data to VGG16 directly which expects 4-d data.
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications import VGG16
conv_base = VGG16(weights='imagenet',
include_top=False,
input_shape=(150, 150, 3))
IMG_SIZE=(150,150,3)
num_class = 3
def create_base():
conv_base = VGG16(weights='imagenet',
include_top=False,
input_shape=(150, 150, 3))
x = GlobalAveragePooling2D()(conv_base.output)
base_model = Model(conv_base.input, x)
return base_model
conv_base = create_base()
ip = Input(shape=(10,150,150,3))
t_conv = TimeDistributed(conv_base)(ip) # vgg16 feature extractor
t_lstm = LSTM(10, return_sequences=False)(t_conv)
f_softmax = Dense(num_class, activation='softmax')(t_lstm)
model = Model(ip, f_softmax)
model.summary()
Model: "model_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_32 (InputLayer) [(None, 10, 150, 150, 3)] 0
_________________________________________________________________
time_distributed_4 (TimeDist (None, 10, 512) 14714688
_________________________________________________________________
lstm_1 (LSTM) (None, 10) 20920
_________________________________________________________________
dense (Dense) (None, 3) 33
=================================================================
Total params: 14,735,641
Trainable params: 14,735,641
Non-trainable params: 0
________________________

Seralizing a keras model with an embedding layer

I've trained a model with pre-trained word embeddings like this:
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in text_tokenizer.word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(vocab_size,
100,
embeddings_initializer=Constant(embedding_matrix),
input_length=50,
trainable=False)
With the architecture looking like this:
sequence_input = Input(shape=(50,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
text_cnn = Conv1D(filters=5, kernel_size=5, padding='same', activation='relu')(embedded_sequences)
text_lstm = LSTM(500, return_sequences=True)(embedded_sequences)
char_in = Input(shape=(50, 18, ))
char_cnn = Conv1D(filters=5, kernel_size=5, padding='same', activation='relu')(char_in)
char_cnn = GaussianNoise(0.40)(char_cnn)
char_lstm = LSTM(500, return_sequences=True)(char_in)
merged = concatenate([char_lstm, text_lstm])
merged_d1 = Dense(800, activation='relu')(merged)
merged_d1 = Dropout(0.5)(merged_d1)
text_class = Dense(len(y_unique), activation='softmax')(merged_d1)
model = Model([sequence_input,char_in], text_class)
When I go to convert the model to json, I get this error:
ValueError: can only convert an array of size 1 to a Python scalar
Similarly, if I use the model.save() function, it seems to save correctly, but when I go to load it, I get Type Error: Expected Float32.
My question is: is there something I am missing when trying to serialize this model? Do I need some sort of Lambda layer or something of the sorts?
Any help would be greatly appreciated!
You can use the weights argument in Embedding layer to provide initial weights.
embedding_layer = Embedding(vocab_size,
100,
weights=[embedding_matrix],
input_length=50,
trainable=False)
The weights should remain non-trainable after model saving/loading:
model.save('1.h5')
m = load_model('1.h5')
m.summary()
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) (None, 50) 0
__________________________________________________________________________________________________
input_4 (InputLayer) (None, 50, 18) 0
__________________________________________________________________________________________________
embedding_1 (Embedding) (None, 50, 100) 1000000 input_3[0][0]
__________________________________________________________________________________________________
lstm_4 (LSTM) (None, 50, 500) 1038000 input_4[0][0]
__________________________________________________________________________________________________
lstm_3 (LSTM) (None, 50, 500) 1202000 embedding_1[0][0]
__________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 50, 1000) 0 lstm_4[0][0]
lstm_3[0][0]
__________________________________________________________________________________________________
dense_2 (Dense) (None, 50, 800) 800800 concatenate_2[0][0]
__________________________________________________________________________________________________
dropout_2 (Dropout) (None, 50, 800) 0 dense_2[0][0]
__________________________________________________________________________________________________
dense_3 (Dense) (None, 50, 15) 12015 dropout_2[0][0]
==================================================================================================
Total params: 4,052,815
Trainable params: 3,052,815
Non-trainable params: 1,000,000
__________________________________________________________________________________________________
I hope you are saving the model after compiling. Like:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
To save model, you can do:
from keras.models import load_model
model.save('model.h5')
model = load_model('model_detect1.h5')
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
To load model,
from keras.models import model_from_json
json_file = open('model.json', 'r')
model_json = json_file.read()
model = model_from_json(model_json)
model.load_weights("model.h5")
I tried multiple methods . The problem is when we work in the embedding layer, then pickle doesnt work, and is not able to save the data.
SO what you can do , when you have some layers like these:-
## Creating model
embedding_vector_features=100
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(LSTM(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())
then, u can use
h5 extension to d=save file, and then convert that to json, model converetd to model2 here
from tensorflow.keras.models import load_model
model.save('model.h5')
model = load_model('model.h5')
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
and this to load data:-
from tensorflow.keras.models import model_from_json
json_file = open('model.json', 'r')
model_json = json_file.read()
model2 = model_from_json(model_json)
model2.load_weights("model.h5")