How to replace (or insert) intermediate layer in Keras model? - tensorflow

I have a trained Keras model and I would like:
1) to replace Con2D layer with the same but without bias.
2) to add BatchNormalization layer before first Activation
How can I do this?
def keras_simple_model():
from keras.models import Model
from keras.layers import Input, Dense, GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D, Activation
inputs1 = Input((28, 28, 1))
x = Conv2D(4, (3, 3), activation=None, padding='same', name='conv1')(inputs1)
x = Activation('relu')(x)
x = Conv2D(4, (3, 3), activation=None, padding='same', name='conv2')(x)
x = Activation('relu')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)
x = Conv2D(8, (3, 3), activation=None, padding='same', name='conv3')(x)
x = Activation('relu')(x)
x = Conv2D(8, (3, 3), activation=None, padding='same', name='conv4')(x)
x = Activation('relu')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x)
x = GlobalAveragePooling2D()(x)
x = Dense(10, activation=None)(x)
x = Activation('softmax')(x)
model = Model(inputs=inputs1, outputs=x)
return model
if __name__ == '__main__':
model = keras_simple_model()
print(model.summary())

The following function allows you to insert a new layer before, after or to replace each layer in the original model whose name matches a regular expression, including non-sequential models such as DenseNet or ResNet.
import re
from keras.models import Model
def insert_layer_nonseq(model, layer_regex, insert_layer_factory,
insert_layer_name=None, position='after'):
# Auxiliary dictionary to describe the network graph
network_dict = {'input_layers_of': {}, 'new_output_tensor_of': {}}
# Set the input layers of each layer
for layer in model.layers:
for node in layer._outbound_nodes:
layer_name = node.outbound_layer.name
if layer_name not in network_dict['input_layers_of']:
network_dict['input_layers_of'].update(
{layer_name: [layer.name]})
else:
network_dict['input_layers_of'][layer_name].append(layer.name)
# Set the output tensor of the input layer
network_dict['new_output_tensor_of'].update(
{model.layers[0].name: model.input})
# Iterate over all layers after the input
model_outputs = []
for layer in model.layers[1:]:
# Determine input tensors
layer_input = [network_dict['new_output_tensor_of'][layer_aux]
for layer_aux in network_dict['input_layers_of'][layer.name]]
if len(layer_input) == 1:
layer_input = layer_input[0]
# Insert layer if name matches the regular expression
if re.match(layer_regex, layer.name):
if position == 'replace':
x = layer_input
elif position == 'after':
x = layer(layer_input)
elif position == 'before':
pass
else:
raise ValueError('position must be: before, after or replace')
new_layer = insert_layer_factory()
if insert_layer_name:
new_layer.name = insert_layer_name
else:
new_layer.name = '{}_{}'.format(layer.name,
new_layer.name)
x = new_layer(x)
print('New layer: {} Old layer: {} Type: {}'.format(new_layer.name,
layer.name, position))
if position == 'before':
x = layer(x)
else:
x = layer(layer_input)
# Set new output tensor (the original one, or the one of the inserted
# layer)
network_dict['new_output_tensor_of'].update({layer.name: x})
# Save tensor in output list if it is output in initial model
if layer_name in model.output_names:
model_outputs.append(x)
return Model(inputs=model.inputs, outputs=model_outputs)
The difference with respect to the simpler case of a purely sequential model is that before iterating over the layers to find the key layer, you first parse the graph and store the input layers of each layer in an auxiliary dictionary. Then, as you iterate over the layers, you also store the new output tensor of each layer, which is used to determine the input layers of each layer, when building the new model.
A use case would be the following, where a Dropout layer is inserted after each activation layer of ResNet50:
from keras.applications.resnet50 import ResNet50
from keras.models import load_model
model = ResNet50()
def dropout_layer_factory():
return Dropout(rate=0.2, name='dropout')
model = insert_layer_nonseq(model, '.*activation.*', dropout_layer_factory)
# Fix possible problems with new model
model.save('temp.h5')
model = load_model('temp.h5')
model.summary()

You can use the following functions:
from keras.models import Model
def replace_intermediate_layer_in_keras(model, layer_id, new_layer):
layers = [l for l in model.layers]
x = layers[0].output
for i in range(1, len(layers)):
if i == layer_id:
x = new_layer(x)
else:
x = layers[i](x)
new_model = Model(input=layers[0].input, output=x)
return new_model
def insert_intermediate_layer_in_keras(model, layer_id, new_layer):
layers = [l for l in model.layers]
x = layers[0].output
for i in range(1, len(layers)):
if i == layer_id:
x = new_layer(x)
x = layers[i](x)
new_model = Model(input=layers[0].input, output=x)
return new_model
Example:
from keras.layers import Conv2D, BatchNormalization
model = keras_simple_model()
print(model.summary())
model = replace_intermediate_layer_in_keras(
model, 3,
Conv2D(
4, (3, 3),
activation=None,
padding='same',
name='conv2_repl',
use_bias=False
)
)
print(model.summary())
model = insert_intermediate_layer_in_keras(
model, 4, BatchNormalization()
)
print(model.summary())
There are some limitation on replacements due to layer shapes etc.

This was how i did it:
import keras
from keras.models import Model
from tqdm import tqdm
from keras import backend as K
def make_list(X):
if isinstance(X, list):
return X
return [X]
def list_no_list(X):
if len(X) == 1:
return X[0]
return X
def replace_layer(model, replace_layer_subname, replacement_fn,
**kwargs):
"""
args:
model :: keras.models.Model instance
replace_layer_subname :: str -- if str in layer name, replace it
replacement_fn :: fn to call to replace all instances
> fn output must produce shape as the replaced layers input
returns:
new model with replaced layers
quick examples:
want to just remove all layers with 'batch_norm' in the name:
> new_model = replace_layer(model, 'batch_norm', lambda **kwargs : (lambda u:u))
want to replace all Conv1D(N, m, padding='same') with an LSTM (lets say all have 'conv1d' in name)
> new_model = replace_layer(model, 'conv1d', lambda layer, **kwargs: LSTM(units=layer.filters, return_sequences=True)
"""
model_inputs = []
model_outputs = []
tsr_dict = {}
model_output_names = [out.name for out in make_list(model.output)]
for i, layer in enumerate(model.layers):
### Loop if layer is used multiple times
for j in range(len(layer._inbound_nodes)):
### check layer inp/outp
inpt_names = [inp.name for inp in make_list(layer.get_input_at(j))]
outp_names = [out.name for out in make_list(layer.get_output_at(j))]
### setup model inputs
if 'input' in layer.name:
for inpt_tsr in make_list(layer.get_output_at(j)):
model_inputs.append(inpt_tsr)
tsr_dict[inpt_tsr.name] = inpt_tsr
continue
### setup layer inputs
inpt = list_no_list([tsr_dict[name] for name in inpt_names])
### remake layer
if replace_layer_subname in layer.name:
print('replacing '+layer.name)
x = replacement_fn(old_layer=layer, **kwargs)(inpt)
else:
x = layer(inpt)
### reinstantialize outputs into dict
for name, out_tsr in zip(outp_names, make_list(x)):
### check if is an output
if name in model_output_names:
model_outputs.append(out_tsr)
tsr_dict[name] = out_tsr
return Model(model_inputs, model_outputs)
I have a custom layer (taken from someone online) called BatchNormalizationFreeze, so an example of usage is this:
new_model = model_replacement(model, 'batch_normal', lambda **kwargs : BatchNormalizationFreeze()(x))
If youre gonna do multiple layers just replace the replacement function with a psuedo model that does them all at once

Unfortunately replacing a layer is no small feat for models that do not follow the sequential pattern. For sequential patterns it is OK to just x = layer(x) and replace with new_layer when you see fit as in the previous answer.
However, for models that do not have a classic sequential pattern (say you have a simple "concatenation" of two columns) you have to actually "parse" the graph and use your "new_layer" (or layers) in the right places. Hope this is not too discouraging and happy graph parsing and reconstructing :)

Related

How to use TimeDistributed layer for predicting sequences of dynamic length? PYTHON 3

So I am trying to build an LSTM based autoencoder, which I want to use for the time series data. These are spitted up to sequences of different lengths. Input to the model has thus shape [None, None, n_features], where the first None stands for number of samples and the second for time_steps of the sequence. The sequences are processed by LSTM with argument return_sequences = False, coded dimension is then recreated by function RepeatVector and ran through LSTM again. In the end I would like to use the TimeDistributed layer, but how to tell python that the time_steps dimension is dynamic? See my code:
from keras import backend as K
.... other dependencies .....
input_ae = Input(shape=(None, 2)) # shape: time_steps, n_features
LSTM1 = LSTM(units=128, return_sequences=False)(input_ae)
code = RepeatVector(n=K.shape(input_ae)[1])(LSTM1) # bottleneck layer
LSTM2 = LSTM(units=128, return_sequences=True)(code)
output = TimeDistributed(Dense(units=2))(LSTM2) # ??????? HOW TO ????
# no problem here so far:
model = Model(input_ae, outputs=output)
model.compile(optimizer='adam', loss='mse')
this function seems to do the trick
def repeat(x_inp):
x, inp = x_inp
x = tf.expand_dims(x, 1)
x = tf.repeat(x, [tf.shape(inp)[1]], axis=1)
return x
example
input_ae = Input(shape=(None, 2))
LSTM1 = LSTM(units=128, return_sequences=False)(input_ae)
code = Lambda(repeat)([LSTM1, input_ae])
LSTM2 = LSTM(units=128, return_sequences=True)(code)
output = TimeDistributed(Dense(units=2))(LSTM2)
model = Model(input_ae, output)
model.compile(optimizer='adam', loss='mse')
X = np.random.uniform(0,1, (100,30,2))
model.fit(X, X, epochs=5)
I'm using tf.keras with TF 2.2

How can I reduce the dimension of data, loaded through the flow_from_directory function of ImageDataGenerator?

Since I load my data (images) from the structured folders, I utilize the flow_from_directory function of the ImageDataGenerator class, which is provided by Keras. I've no issues while feeding this data to a CNN model. But when it comes to an LSTM model, getting the following error: ValueError: Error when checking input: expected lstm_1_input to have 3 dimensions, but got array with shape (64, 28, 28, 1). How can I reduce the dimension of the input data while reading it via ImageDataGenerator objects to be able to use an LSTM model instead of a CNN?
p.s. The shape of the input images is (28, 28) and they are grayscale.
train_valid_datagen = ImageDataGenerator(validation_split=0.2)
train_gen = train_valid_datagen.flow_from_directory(
directory=TRAIN_IMAGES_PATH,
target_size=(28, 28),
color_mode='grayscale',
batch_size=64,
class_mode='categorical',
shuffle=True,
subset='training'
)
Update: The LSTM model code:
inp = Input(shape=(28, 28, 1))
inp = Lambda(lambda x: squeeze(x, axis=-1))(inp) # from 4D to 3D
x = LSTM(num_units, dropout=dropout, recurrent_dropout=recurrent_dropout, activation=activation_fn, return_sequences=True)(inp)
x = BatchNormalization()(x)
x = Dense(128, activation=activation_fn)(x)
output = Dense(nb_classes, activation='softmax', kernel_regularizer=l2(0.001))(x)
model = Model(inputs=inp, outputs=output)
you start feeding your network with 4D data like your images in order to have the compatibility with ImageDataGenerator and then you have to reshape them in 3D format for LSTM.
These are the possibilities:
with only one channel you can simply squeeze the last dimension
inp = Input(shape=(28, 28, 1))
x = Lambda(lambda x: tf.squeeze(x, axis=-1))(inp) # from 4D to 3D
x = LSTM(32)(x)
if you have multiple channels (this is the case of RGB images or if would like to apply a RNN after a Conv2D) a solution can be this
inp = Input(shape=(28, 28, 1))
x = Conv2D(32, 3, padding='same', activation='relu')(inp)
x = Reshape((28,28*32))(x) # from 4D to 3D
x = LSTM(32)(x)
the fit can be computed as always with model.fit_generator
UPDATE: model review
inp = Input(shape=(28, 28, 1))
x = Lambda(lambda x: squeeze(x, axis=-1))(inp) # from 4D to 3D
x = LSTM(32, dropout=dropout, recurrent_dropout=recurrent_dropout, activation=activation_fn, return_sequences=False)(x)
x = BatchNormalization()(x)
x = Dense(128, activation=activation_fn)(x)
output = Dense(nb_classes, activation='softmax', kernel_regularizer=l2(0.001))(x)
model = Model(inputs=inp, outputs=output)
model.summary()
pay attention when you define inp variable (don't overwrite it)
set return_seq = False in LSTM in order to have 2D output

Converting keras functional model to keras class in tensorflow 2

I am trying to convert a Keras functional model into class derived from tensorflow.keras.models.Model and I'm facing 2 issues.
1. I need to multiply 2 layers using tensorflow.keras.layers.multiply, but it returns a ValueError: A merge layer should be called on a list of inputs.
2. If I remove this layern thus working with a classical CNN, it returns a tensorflow.python.eager.core._SymbolicException:Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'patch:0' shape=(None, 64, 64, 3) dtype=float32>].
I would appreciate some guidance to convert my code. I'm using Python 3.7, TensorFlow 2.0rc2 and Keras 2.3.0. The class I have defined is the following:
class TestCNN(Model):
"""
conv1 > conv2 > fc1 > fc2 > alpha * fc2 > Sigmoid > output
"""
def __init__(self, input_dimension, n_category,**kwargs):
"""
Instanciator
:param input_dimension: tuple of int, theoretically (patch_size x patch_size x channels)
:param n_category: int, the number of categories to classify,
:param weight_decay: float, weight decay parameter for all the kernel regularizers
:return: the Keras model
"""
super(TestCNN, self).__init__(name='testcnn', **kwargs)
self.input_dimension = input_dimension
self.n_category = n_category
self.conv1 = Conv2D(36, activation='relu', name='conv1/relu')
self.conv1_maxpooling = MaxPooling2D((2, 2), name='conv1/maxpooling')
self.conv2 = Conv2D(48, activation='relu', name='conv2/relu')
self.conv2_maxpooling = MaxPooling2D((2, 2), name='conv2/maxpooling')
self.flatten1 = Flatten(name='flatten1')
self.fc1 = Dense(512, activation='relu', name='fc1/relu')
self.fc2 = Dense(512, activation='relu', name='fc2/relu')
self.alpha = TestLayer(layer_dim=128, name='alpha')
self.output1 = TestSigmoid(output_dimension=n_category, name='output_layer')
#tensorflow.function
def call(self, x):
x = self.conv1(x)
x = self.conv1_maxpooling(x)
x = self.conv2(x)
x = self.conv2_maxpooling(x)
x = self.flatten1(x)
x = self.fc1(x)
x = self.fc2(x)
alpha_times_fc2 = multiply([alpha_output, fc2_output], name='alpha_times_fc2')
return self.output1(alpha_times_fc2)
def build(self, **kwargs):
inputs = Input(shape=self.input_dimension, dtype='float32', name='patch')
outputs = self.call(inputs)
super(TestCNN, self).__init__(name="TestCNN", inputs=inputs, outputs=outputs, **kwargs)
Then, in my main loop, I'm creating the instance as following:
testcnn = TestCNN(input_dimension=input_dimension, n_category=training_set.category_count)
optimizer = tensorflow.keras.optimizers.Adam(
lr=parameter['training']['adam']['learning_rate'],
beta_1=parameter['training']['adam']['beta1'],
beta_2=parameter['training']['adam']['beta2'])
metrics_list = [tensorflow.keras.metrics.TruePositives]
loss_function = tensorflow.keras.losses.categorical_crossentropy
loss_metrics = tensorflow.keras.metrics.Mean()
testcnn.build()
testcnn.summary()
This code is raising the tensorflow.python.eager.core._SymbolicException. If I comment out some lines and return directly the results of the fc2 layer, I've got the ValueError.
I have commenter the build() function in my model and call it in my main script as following:
testcnn.build(input_dimension)
testcnn.compile(optimizer=adam_optimizer, loss=loss_function, metrics=metrics_list)
testcnn.summary()
Input dimension is a list formatted as following:
input_dimension = (batch_size, image_size, image_size, channels)

Finetuning DNN with continuous outputs in the last layer

Greatly appreciate it if someone could help me out here:
I'm trying to do some finetuning on a regression task --- my inputs are 200X200 RGB images and my prediction output/label is a set of real values (let's say, within [0,10], though scaling is not a big deal here...?) --- on top of InceptionV3 architecture. Here are my functions that take a pretrained Inception model, remove the last layer and add a a new layer, set up for finetuning...
"""
Fine-tuning functions
"""
IM_WIDTH, IM_HEIGHT = 299, 299 #fixed size for InceptionV3
NB_EPOCHS = 3
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172
def eucl_dist(inputs):
x, y = inputs
return ((x - y)**2).sum(axis=-1)
def add_new_last_continuous_layer(base_model):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top, for instance:
base_model = InceptionV3(weights='imagenet',include_top=False)
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(FC_SIZE, activation='relu')(x)
predictions = Lambda(eucl_dist, output_shape=(1,))(x)
model = Model(input=base_model.input, output=predictions)
return model
def setup_to_finetune_continuous(model):
"""Freeze the bottom NB_IV3_LAYERS and retrain the remaining top
layers.
note: NB_IV3_LAYERS corresponds to the top 2 inception blocks in
the inceptionv3 architecture
Args:
model: keras model
"""
for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
layer.trainable = False
for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
layer.trainable = True
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
loss='eucl_dist')
Here are my implementations:
base_model = InceptionV3(weights = "imagenet",
include_top=False, input_shape=(3,200,200))
model0 = add_new_last_continuous_layer(base_model)
setup_to_finetune_continuous(model0)
history=model0.fit(train_x, train_y, validation_data = (test_x, test_y), nb_epoch=epochs, batch_size=32)
scores = model0.evaluate(test_x, test_y, verbose = 0)
features = model0.predict(X_train)
where train_x is a (168435, 3, 200, 200) numpy array and train_y is a (168435,) numpy array. The same goes for test_x and test_y except the number of observations is 42509.
I got the TypeError: Tensor object is not iterable bug which occurred at predictions = Lambda(eucl_dist, output_shape=(1,))(x)'' when going through theadd_new_last_continuous_layer()`` function. Could you anyone kindly give me some guidance to get around that and what the problem is? Greatly appreciated and happy holidays!
EDIT:
Changed the functions to:
def eucl_dist(inputs):
x, y = inputs
return ((x - y)**2).sum(axis=-1)
def add_new_last_continuous_layer(base_model):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top, for instance:
base_model = InceptionV3(weights='imagenet',include_top=False)
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x1 = Dense(FC_SIZE, activation='relu')(x)
x2 = Dense(FC_SIZE, activation='relu')(x)
predictions = Lambda(eucl_dist, output_shape=eucl_dist_shape)([x1,x2])
model = Model(input=base_model.input, output=predictions)
return model
Your output shape for the lambda layer is wrong. Define your functions like this:
from keras import backend as K
def euclidean_distance(vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
def eucl_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)
predictions = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([input1, input2])

Concatenating conv layers with different filter sizes in CNTK

In CNTK - how can I use several filter sizes on the same layer (e.g. filter sizes 2,3,4,5)?
Following the work done here (link to code in github below(1)), I want to take text, use an embedding layer, apply four different sizes of filters (2,3,4,5), concatenate the results and feed it to a fully connected layer.
Network architecture figure
Keras sample code:
main_input = Input(shape=(100,)
embedding = Embedding(output_dim=32, input_dim=100, input_length=100, dropout=0)(main_input)
conv1 = getconvmodel(2,256)(embedding)
conv2 = getconvmodel(3,256)(embedding)
conv3 = getconvmodel(4,256)(embedding)
conv4 = getconvmodel(5,256)(embedding)
merged = merge([conv1,conv2,conv3,conv4],mode="concat")
def getconvmodel(filter_length,nb_filter):
model = Sequential()
model.add(Convolution1D(nb_filter=nb_filter,
`enter code here`input_shape=(100,32),
filter_length=filter_length,
border_mode='same',
activation='relu',
subsample_length=1))
model.add(Lambda(sum_1d, output_shape=(nb_filter,)))
#model.add(BatchNormalization(mode=0))
model.add(Dropout(0.5))
return model
(1): /joshsaxe/eXposeDeepNeuralNetwork/blob/master/src/modeling/models.py
You can do something like this:
import cntk as C
import cntk.layers as cl
def getconvmodel(filter_length,nb_filter):
#Function
def model(x):
f = cl.Convolution(filter_length, nb_filter, activation=C.relu))(x)
f = C.reduce_sum(f, axis=0)
f = cl.Dropout(0.5) (f)
return model
main_input = C.input_variable(100)
embedding = cl.Embedding(32)(main_input)
conv1 = getconvmodel(2,256)(embedding)
conv2 = getconvmodel(3,256)(embedding)
conv3 = getconvmodel(4,256)(embedding)
conv4 = getconvmodel(5,256)(embedding)
merged = C.splice([conv1,conv2,conv3,conv4])
Or with Sequential() and a lambda:
def getconvmodel(filter_length,nb_filter):
return Sequential([
cl.Convolution(filter_length, nb_filter, activation=C.relu)),
lambda f: C.reduce_sum(f, axis=0),
cl.Dropout()
])