Converting keras functional model to keras class in tensorflow 2 - tensorflow

I am trying to convert a Keras functional model into class derived from tensorflow.keras.models.Model and I'm facing 2 issues.
1. I need to multiply 2 layers using tensorflow.keras.layers.multiply, but it returns a ValueError: A merge layer should be called on a list of inputs.
2. If I remove this layern thus working with a classical CNN, it returns a tensorflow.python.eager.core._SymbolicException:Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'patch:0' shape=(None, 64, 64, 3) dtype=float32>].
I would appreciate some guidance to convert my code. I'm using Python 3.7, TensorFlow 2.0rc2 and Keras 2.3.0. The class I have defined is the following:
class TestCNN(Model):
"""
conv1 > conv2 > fc1 > fc2 > alpha * fc2 > Sigmoid > output
"""
def __init__(self, input_dimension, n_category,**kwargs):
"""
Instanciator
:param input_dimension: tuple of int, theoretically (patch_size x patch_size x channels)
:param n_category: int, the number of categories to classify,
:param weight_decay: float, weight decay parameter for all the kernel regularizers
:return: the Keras model
"""
super(TestCNN, self).__init__(name='testcnn', **kwargs)
self.input_dimension = input_dimension
self.n_category = n_category
self.conv1 = Conv2D(36, activation='relu', name='conv1/relu')
self.conv1_maxpooling = MaxPooling2D((2, 2), name='conv1/maxpooling')
self.conv2 = Conv2D(48, activation='relu', name='conv2/relu')
self.conv2_maxpooling = MaxPooling2D((2, 2), name='conv2/maxpooling')
self.flatten1 = Flatten(name='flatten1')
self.fc1 = Dense(512, activation='relu', name='fc1/relu')
self.fc2 = Dense(512, activation='relu', name='fc2/relu')
self.alpha = TestLayer(layer_dim=128, name='alpha')
self.output1 = TestSigmoid(output_dimension=n_category, name='output_layer')
#tensorflow.function
def call(self, x):
x = self.conv1(x)
x = self.conv1_maxpooling(x)
x = self.conv2(x)
x = self.conv2_maxpooling(x)
x = self.flatten1(x)
x = self.fc1(x)
x = self.fc2(x)
alpha_times_fc2 = multiply([alpha_output, fc2_output], name='alpha_times_fc2')
return self.output1(alpha_times_fc2)
def build(self, **kwargs):
inputs = Input(shape=self.input_dimension, dtype='float32', name='patch')
outputs = self.call(inputs)
super(TestCNN, self).__init__(name="TestCNN", inputs=inputs, outputs=outputs, **kwargs)
Then, in my main loop, I'm creating the instance as following:
testcnn = TestCNN(input_dimension=input_dimension, n_category=training_set.category_count)
optimizer = tensorflow.keras.optimizers.Adam(
lr=parameter['training']['adam']['learning_rate'],
beta_1=parameter['training']['adam']['beta1'],
beta_2=parameter['training']['adam']['beta2'])
metrics_list = [tensorflow.keras.metrics.TruePositives]
loss_function = tensorflow.keras.losses.categorical_crossentropy
loss_metrics = tensorflow.keras.metrics.Mean()
testcnn.build()
testcnn.summary()
This code is raising the tensorflow.python.eager.core._SymbolicException. If I comment out some lines and return directly the results of the fc2 layer, I've got the ValueError.

I have commenter the build() function in my model and call it in my main script as following:
testcnn.build(input_dimension)
testcnn.compile(optimizer=adam_optimizer, loss=loss_function, metrics=metrics_list)
testcnn.summary()
Input dimension is a list formatted as following:
input_dimension = (batch_size, image_size, image_size, channels)

Related

TypeError: Expected keras.losses.Loss, found function

I want to build a TFF model for speech recognition systems. For this, I use the CNN-GRU model architecture with a CTC loss function. but I got error when I wanted to build_federated_averaging_process and think it's about the ctc_loss function but I cant fix it.
part of my code is:
def CTCLoss(y_true, y_pred):
# Compute the training-time loss value
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
return loss
def create_compiled_keras_model():
"""Model similar to DeepSpeech2."""
# Model's input
input_spectrogram = layers.Input((None, fft_length // 2 + 1), name="input")
# Expand the dimension to use 2D CNN.
x = layers.Reshape((-1, fft_length // 2 + 1 , 1), name="expand_dim")(input_spectrogram)
# Convolution layer 1
x = layers.Conv2D(
filters=32,
kernel_size=[11, 41],
strides=[2, 2],
padding="same",
use_bias=False,
name="conv_1",
)(x)
x = layers.BatchNormalization(name="conv_1_bn")(x)
x = layers.ReLU(name="conv_1_relu")(x)
# Convolution layer 2
x = layers.Conv2D(
filters=32,
kernel_size=[11, 21],
strides=[1, 2],
padding="same",
use_bias=False,
name="conv_2",
)(x)
x = layers.BatchNormalization(name="conv_2_bn")(x)
x = layers.ReLU(name="conv_2_relu")(x)
# Reshape the resulted volume to feed the RNNs layers
x = layers.Reshape((-1, x.shape[-2] * x.shape[-1]))(x)
# RNN layers
for i in range(1, 2 + 1):
recurrent = layers.GRU(
units=128,
activation="tanh",
recurrent_activation="sigmoid",
use_bias=True,
return_sequences=True,
reset_after=True,
name=f"gru_{i}",
)
x = layers.Bidirectional(
recurrent, name=f"bidirectional_{i}", merge_mode="concat"
)(x)
if i < 2:
x = layers.Dropout(rate=0.5)(x)
# Dense layer
x = layers.Dense(units=128 * 2, name="dense_1")(x)
x = layers.ReLU(name="dense_1_relu")(x)
x = layers.Dropout(rate=0.5)(x)
# Classification layer
output = layers.Dense(units= output_dim + 1, activation="softmax")(x)
# Model
model = keras.Model(input_spectrogram, output, name="DeepSpeech_2")
return model
def model_fn():
# We _must_ create a new model here, and _not_ capture it from an external
# scope. TFF will call this within different graph contexts.
keras_model = create_compiled_keras_model()
return tff.learning.from_keras_model(
keras_model,
input_spec=layers.Input((None, fft_length // 2 + 1)),
loss=CTCLoss)
and I got error in this step :
iterative_process = tff.learning.build_federated_averaging_process(
model_fn,
client_optimizer_fn=lambda:keras.optimizers.Adam(learning_rate=1e-4))
TypeError: Expected keras.losses.Loss, found function.
how do I fix it?
class Customloss(tf.keras.losses.Loss):
def __init__(self):
super().__init__()
#tf.function
def CTCLoss(self, y_true, y_pred):
...#
return loss
try to use tf.keras.losses.Loss for custom loss in tff. It will work.

How to make a Keras Dense Layer deal with 3D tensor as input for this Softmax Fully Connected Layer?

I am working on a custom problem, and i have to change the fully connected layer (Dense with softmax), My model code is something like this (with Keras Framework):
.......
batch_size = 8
inputs = tf.random.uniform(shape=[batch_size,1024,256],dtype=tf.dtypes.float32)
preds = Dense(num_classes,activation='softmax')(x) #final layer with softmax activation
....
model = Model(inputs=base_model.input,outputs=preds)
So, i have to change the Code of Dense Layer to output a Tensor of probabilities with the shape of [batch_size, 1024, num_classes], without using a for loop, i need it to be optimized and not a consuming time function
The Dense code version that i want to change:
class Dense(Layer):
"""Just your regular densely-connected NN layer.
`Dense` implements the operation:
`output = activation(dot(input, kernel) + bias)`
where `activation` is the element-wise activation function
passed as the `activation` argument, `kernel` is a weights matrix
created by the layer, and `bias` is a bias vector created by the layer
(only applicable if `use_bias` is `True`).
Note: if the input to the layer has a rank greater than 2, then
it is flattened prior to the initial dot product with `kernel`.
# Example
```python
# as first layer in a sequential model:
model = Sequential()
model.add(Dense(32, input_shape=(16,)))
# now the model will take as input arrays of shape (*, 16)
# and output arrays of shape (*, 32)
# after the first layer, you don't need to specify
# the size of the input anymore:
model.add(Dense(32))
```
# Arguments
units: Positive integer, dimensionality of the output space.
activation: Activation function to use
(see [activations](../activations.md)).
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
nD tensor with shape: `(batch_size, ..., input_dim)`.
The most common situation would be
a 2D input with shape `(batch_size, input_dim)`.
# Output shape
nD tensor with shape: `(batch_size, ..., units)`.
For instance, for a 2D input with shape `(batch_size, input_dim)`,
the output would have shape `(batch_size, units)`.
"""
def __init__(self, units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(Dense, self).__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = InputSpec(min_ndim=2)
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs):
output = K.dot(inputs, self.kernel)
if self.use_bias:
output = K.bias_add(output, self.bias)
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def get_config(self):
config = {
'units': self.units,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(Dense, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
There are three different ways in which this can be done (that I can think of). If you want to have a single dense layer, that maps a vector of 256 elements to a vector of num_classes elements, and apply it all across your batch of data (that is, use the same 256 x num_classes matrix of weights for every sample), then you don't need to do anything special, just use a regular Dense layer:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2570
Another way would be to have a single huge Dense layer that takes all 1024 * 256 values in at the same time and produces all 1024 * num_classes values at the output, that is, a layer with a matrix of weights with shape (1024 * 256) x (1024 * num_classes) (in the order if gigabytes of memory!). This is easy to do too, although it seems unlikely to be what you need:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Flatten, Dense, Reshape, Softmax
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
res = Flatten()(inp)
# This takes _a lot_ of memory!
layer = Dense(1024 * num_classes, activation=None)
out_res = layer(res)
# Apply softmax after reshaping
out_preact = Reshape((-1, num_classes))(out_res)
out = Softmax()(out_preact)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2684364800
Finally, you may want to have a set of 1024 weight matrices, each one applied to the corresponding sample in the input, which would imply an array of weights with shape (1024, 256, num_classes). I don't think this can be done with one of the standard Keras layers (or don't know how to)1, but it's easy enough to write a custom layer based on Dense to do that:
import tensorflow as tf
from tensorflow.keras.layers import Dense, InputSpec
class Dense2D(Dense):
def __init__(self, *args, **kwargs):
super(Dense2D, self).__init__(*args, **kwargs)
def build(self, input_shape):
assert len(input_shape) >= 3
input_dim1 = input_shape[-2]
input_dim2 = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim1, input_dim2, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(input_dim1, self.units),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=3, axes={-2: input_dim1, -1: input_dim2})
self.built = True
def call(self, inputs):
# Multiply each set of weights with each input element
output = tf.einsum('...ij,ijk->...ik', inputs, self.kernel)
if self.use_bias:
output += self.bias
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 3
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
You would then use it like this:
import tensorflow as tf
from tensorflow.keras import Input
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense2D(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680
1: As today points out in the comments, you can actually use a LocallyConnected1D layer to do the same that I tried to do with my Dense2D layer. It is as simple as this:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import LocallyConnected1D
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = LocallyConnected1D(num_classes, 1, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680

Gradients are None for Custom Convolution Layer

I have implemented the Basic MNIST model with Custom convolution layer as shown below. The problem is that the Gradients are always 'None' for the Custom Layer and so the learning does not happens during back propagation, as the Grad has None values.
I have debugged the outputs of the layers during forward pass and they are OK.
Here is the sample code, for simplicity I have passed image of 'Ones' and have just returned the matrix from the custom layer.
I have tried my best but could make it work any help is very much appreciated in advance
following code is executable and raises the
warning
:tensorflow:Gradients do not exist for variables ['cnn/custom_conv2d/kernel:0', 'cnn/custom_conv2d/bias:0', 'cnn/custom_conv2d_1/kernel:0', 'cnn/custom_conv2d_1/bias:0', 'cnn/custom_conv2d_2/kernel:0', 'cnn/custom_conv2d_2/bias:0'] when minimizing the loss.
import numpy as np
import tensorflow as tf
from grpc.beta import interfaces
class CustomConv2D(tf.keras.layers.Conv2D):
def __init__(self, filters,
kernel_size,
strides=(1, 1),
padding='valid',
data_format=None,
dilation_rate=(1, 1),
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
__name__ = 'CustomConv2D',
**kwargs
):
super(CustomConv2D, self).__init__(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs )
def call(self, input):
(unrolled_mat, filters, shape) = self.prepare(input)
#unrolled_mat=unrolled inputs
#filters=unrolled kernels of the lAYER
#convolution through unrolling
conv_result = tf.tensordot(unrolled_mat, filters, axes=1)
result=tf.convert_to_tensor(tf.reshape(conv_result, shape))
return result
def prepare(self, matrix):
batches,rows,cols,channels=matrix.shape
kernel_size = self.kernel_size[0]
unrolled_matrices=None
# start = timer()
for batch in range(batches):
unrolled_maps=None
for chanel in range(channels):
unrolled_map = self.unroll(batch, cols, kernel_size, matrix, rows,chanel)
if unrolled_maps is None:
unrolled_maps = unrolled_map
else:
unrolled_maps=np.append(unrolled_maps,unrolled_map,axis=1)
unrolled_maps = np.reshape(unrolled_maps,(-1,unrolled_maps.shape[0],unrolled_maps.shape[1]))
if unrolled_matrices is None:
unrolled_matrices = unrolled_maps
else:
unrolled_matrices = np.concatenate((unrolled_matrices, unrolled_maps))
kernels=self.get_weights()
kernels=np.reshape(kernels[0],(unrolled_matrices[0].shape[1],-1))
shp=(batches,rows-(kernel_size-1),cols-(kernel_size-1),self.filters)
matrix=unrolled_matrices
return (matrix, kernels, shp)
def unroll(self, batch, cols, kernel_size, matrix, rows, chanel):
# a=np.zeros((shape))
unrolled_feature_map = None
for x in range(0, rows - (kernel_size - 1)):
for y in range(0, (cols - (kernel_size - 1))):
temp_row = None # flattened kernal at single position
for k in range(kernel_size):
for l in range(kernel_size):
if temp_row is None:
temp_row = matrix[batch, x + k, y + l, chanel]
# print(matrix[batch, x + k, y + l])
else:
temp_row = np.append(temp_row, matrix[batch, x + k, y + l, chanel])
# print(matrix[batch, x + k, y + l])
if unrolled_feature_map is None:
unrolled_feature_map = np.reshape(temp_row,
(-1, kernel_size * kernel_size)) # first row of unrolled matrix added
else:
unrolled_feature_map = np.concatenate((unrolled_feature_map, np.reshape(temp_row,
(-1, kernel_size * kernel_size)))) # concatinate subsequent row to un_mat
unrolled_feature_map = np.reshape(unrolled_feature_map,( unrolled_feature_map.shape[0], unrolled_feature_map.shape[1]))
# print(unrolled_feature_map.shape)
matrix=unrolled_feature_map
return matrix
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.learning_rate = 0.001
self.momentum = 0.9
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, self.momentum)
self.conv1 = CustomConv2D(filters = 6, kernel_size= 3, activation = 'relu') ## valid means no padding
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=2) # default stride??-
self.conv2 = CustomConv2D(filters = 16, kernel_size = 3, activation = 'relu')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size = 2)
self.conv3 = CustomConv2D(filters=120, kernel_size=3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.fc1 = tf.keras.layers.Dense(units=82,kernel_initializer='glorot_uniform')
self.fc2 = tf.keras.layers.Dense(units=10, activation = 'softmax',kernel_initializer='glorot_uniform')
def call(self, x):
x = self.conv1(x) # shap(32,26,26,6) all (6s 3s 6s 3s)
x = self.pool1(x) # shap(32,13,13,6) all (6s)
x = self.conv2(x) # shap(32,11,11,16) all(324s)
x = self.pool2(x) # shap(32,5,5,16)
x = self.conv3(x) # shap(32,3,3,120)all(46656)
x = self.flatten(x) # shap(32,1080)
x = self.fc1(x) # shap(32,82)
x = self.fc2(x) # shap(32,10)
return x
def feedForward(self, image, label):
accuracy_object = tf.metrics.Accuracy()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
with tf.GradientTape() as tape:
feedForwardCompuation = self(image, training=True)
self.loss_value = loss_object(label, feedForwardCompuation)
grads = tape.gradient(self.loss_value, self.variables)
self.optimizer.apply_gradients(zip(grads, self.variables))
accuracy = accuracy_object(tf.argmax(feedForwardCompuation, axis=1, output_type=tf.int32), label)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train=x_train.astype('float32')
y_train = y_train.astype('float32')
image=x_train[0].reshape((1,28,28,1))
label=y_train[0]
cnn=CNN()
cnn.feedForward(image,label)
UPDATE: I am not using the builtin TF conv fucntion rather I am implementing my own custom convolution operation via Matrix unrolling method(unrolled map*unrolled filters). But the Tap.gradient returns "None" for the custom layers however when I use the builtin conv2d function of TF then it works fine!
I have Added the actual code of the operation
Snapshot of grads while debugging
Problem is that the Convolution Operation is not happening in the Class, CustomConv2D. Neither the call Method, nor the customConv Method is performing Convolution Operation, but it is just returning the Input, as it is.
Replacing the line, return self.customConv(matrix) in the call method of CustomConv2D Class with return super(tf.keras.layers.Conv2D, self).call(matrix) will perform the actual Convolutional Operation.
One more change is to invoke the call method of CNN class by including the line, _ = cnn(X_reshaped) before the line, cnn.feedForward(image,label)
By doing the above 2 changes, Gradients will be added.

How to replace (or insert) intermediate layer in Keras model?

I have a trained Keras model and I would like:
1) to replace Con2D layer with the same but without bias.
2) to add BatchNormalization layer before first Activation
How can I do this?
def keras_simple_model():
from keras.models import Model
from keras.layers import Input, Dense, GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D, Activation
inputs1 = Input((28, 28, 1))
x = Conv2D(4, (3, 3), activation=None, padding='same', name='conv1')(inputs1)
x = Activation('relu')(x)
x = Conv2D(4, (3, 3), activation=None, padding='same', name='conv2')(x)
x = Activation('relu')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)
x = Conv2D(8, (3, 3), activation=None, padding='same', name='conv3')(x)
x = Activation('relu')(x)
x = Conv2D(8, (3, 3), activation=None, padding='same', name='conv4')(x)
x = Activation('relu')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x)
x = GlobalAveragePooling2D()(x)
x = Dense(10, activation=None)(x)
x = Activation('softmax')(x)
model = Model(inputs=inputs1, outputs=x)
return model
if __name__ == '__main__':
model = keras_simple_model()
print(model.summary())
The following function allows you to insert a new layer before, after or to replace each layer in the original model whose name matches a regular expression, including non-sequential models such as DenseNet or ResNet.
import re
from keras.models import Model
def insert_layer_nonseq(model, layer_regex, insert_layer_factory,
insert_layer_name=None, position='after'):
# Auxiliary dictionary to describe the network graph
network_dict = {'input_layers_of': {}, 'new_output_tensor_of': {}}
# Set the input layers of each layer
for layer in model.layers:
for node in layer._outbound_nodes:
layer_name = node.outbound_layer.name
if layer_name not in network_dict['input_layers_of']:
network_dict['input_layers_of'].update(
{layer_name: [layer.name]})
else:
network_dict['input_layers_of'][layer_name].append(layer.name)
# Set the output tensor of the input layer
network_dict['new_output_tensor_of'].update(
{model.layers[0].name: model.input})
# Iterate over all layers after the input
model_outputs = []
for layer in model.layers[1:]:
# Determine input tensors
layer_input = [network_dict['new_output_tensor_of'][layer_aux]
for layer_aux in network_dict['input_layers_of'][layer.name]]
if len(layer_input) == 1:
layer_input = layer_input[0]
# Insert layer if name matches the regular expression
if re.match(layer_regex, layer.name):
if position == 'replace':
x = layer_input
elif position == 'after':
x = layer(layer_input)
elif position == 'before':
pass
else:
raise ValueError('position must be: before, after or replace')
new_layer = insert_layer_factory()
if insert_layer_name:
new_layer.name = insert_layer_name
else:
new_layer.name = '{}_{}'.format(layer.name,
new_layer.name)
x = new_layer(x)
print('New layer: {} Old layer: {} Type: {}'.format(new_layer.name,
layer.name, position))
if position == 'before':
x = layer(x)
else:
x = layer(layer_input)
# Set new output tensor (the original one, or the one of the inserted
# layer)
network_dict['new_output_tensor_of'].update({layer.name: x})
# Save tensor in output list if it is output in initial model
if layer_name in model.output_names:
model_outputs.append(x)
return Model(inputs=model.inputs, outputs=model_outputs)
The difference with respect to the simpler case of a purely sequential model is that before iterating over the layers to find the key layer, you first parse the graph and store the input layers of each layer in an auxiliary dictionary. Then, as you iterate over the layers, you also store the new output tensor of each layer, which is used to determine the input layers of each layer, when building the new model.
A use case would be the following, where a Dropout layer is inserted after each activation layer of ResNet50:
from keras.applications.resnet50 import ResNet50
from keras.models import load_model
model = ResNet50()
def dropout_layer_factory():
return Dropout(rate=0.2, name='dropout')
model = insert_layer_nonseq(model, '.*activation.*', dropout_layer_factory)
# Fix possible problems with new model
model.save('temp.h5')
model = load_model('temp.h5')
model.summary()
You can use the following functions:
from keras.models import Model
def replace_intermediate_layer_in_keras(model, layer_id, new_layer):
layers = [l for l in model.layers]
x = layers[0].output
for i in range(1, len(layers)):
if i == layer_id:
x = new_layer(x)
else:
x = layers[i](x)
new_model = Model(input=layers[0].input, output=x)
return new_model
def insert_intermediate_layer_in_keras(model, layer_id, new_layer):
layers = [l for l in model.layers]
x = layers[0].output
for i in range(1, len(layers)):
if i == layer_id:
x = new_layer(x)
x = layers[i](x)
new_model = Model(input=layers[0].input, output=x)
return new_model
Example:
from keras.layers import Conv2D, BatchNormalization
model = keras_simple_model()
print(model.summary())
model = replace_intermediate_layer_in_keras(
model, 3,
Conv2D(
4, (3, 3),
activation=None,
padding='same',
name='conv2_repl',
use_bias=False
)
)
print(model.summary())
model = insert_intermediate_layer_in_keras(
model, 4, BatchNormalization()
)
print(model.summary())
There are some limitation on replacements due to layer shapes etc.
This was how i did it:
import keras
from keras.models import Model
from tqdm import tqdm
from keras import backend as K
def make_list(X):
if isinstance(X, list):
return X
return [X]
def list_no_list(X):
if len(X) == 1:
return X[0]
return X
def replace_layer(model, replace_layer_subname, replacement_fn,
**kwargs):
"""
args:
model :: keras.models.Model instance
replace_layer_subname :: str -- if str in layer name, replace it
replacement_fn :: fn to call to replace all instances
> fn output must produce shape as the replaced layers input
returns:
new model with replaced layers
quick examples:
want to just remove all layers with 'batch_norm' in the name:
> new_model = replace_layer(model, 'batch_norm', lambda **kwargs : (lambda u:u))
want to replace all Conv1D(N, m, padding='same') with an LSTM (lets say all have 'conv1d' in name)
> new_model = replace_layer(model, 'conv1d', lambda layer, **kwargs: LSTM(units=layer.filters, return_sequences=True)
"""
model_inputs = []
model_outputs = []
tsr_dict = {}
model_output_names = [out.name for out in make_list(model.output)]
for i, layer in enumerate(model.layers):
### Loop if layer is used multiple times
for j in range(len(layer._inbound_nodes)):
### check layer inp/outp
inpt_names = [inp.name for inp in make_list(layer.get_input_at(j))]
outp_names = [out.name for out in make_list(layer.get_output_at(j))]
### setup model inputs
if 'input' in layer.name:
for inpt_tsr in make_list(layer.get_output_at(j)):
model_inputs.append(inpt_tsr)
tsr_dict[inpt_tsr.name] = inpt_tsr
continue
### setup layer inputs
inpt = list_no_list([tsr_dict[name] for name in inpt_names])
### remake layer
if replace_layer_subname in layer.name:
print('replacing '+layer.name)
x = replacement_fn(old_layer=layer, **kwargs)(inpt)
else:
x = layer(inpt)
### reinstantialize outputs into dict
for name, out_tsr in zip(outp_names, make_list(x)):
### check if is an output
if name in model_output_names:
model_outputs.append(out_tsr)
tsr_dict[name] = out_tsr
return Model(model_inputs, model_outputs)
I have a custom layer (taken from someone online) called BatchNormalizationFreeze, so an example of usage is this:
new_model = model_replacement(model, 'batch_normal', lambda **kwargs : BatchNormalizationFreeze()(x))
If youre gonna do multiple layers just replace the replacement function with a psuedo model that does them all at once
Unfortunately replacing a layer is no small feat for models that do not follow the sequential pattern. For sequential patterns it is OK to just x = layer(x) and replace with new_layer when you see fit as in the previous answer.
However, for models that do not have a classic sequential pattern (say you have a simple "concatenation" of two columns) you have to actually "parse" the graph and use your "new_layer" (or layers) in the right places. Hope this is not too discouraging and happy graph parsing and reconstructing :)

TypeError: Input tensors to a Model must be Keras tensors. Found: Tensor("Placeholder_3:0", dtype=float32) (missing Keras metadata)

my input variables
IMG_SIZE_PX=50
SLICE_COUNT=20
n_classes=2
x=tf.placeholder('float')
y=tf.placeholder('float')
keep_rate=0.8
keep_prob=tf.placeholder(tf.float32)
my convolution 3d function
def conv3d(x, W):
return tf.nn.conv3d(x, W, strides=[1,1,1,1,1], padding='SAME')
my maxpooling 3d function
def maxpool3d(x):
return tf.nn.max_pool3d(x, ksize=[1,2,2,2,1], strides=[1,2,2,2,1],
padding='SAME')
this is my network
def convolutional_neural_network(x):
my network weights
weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,3,1,32])),
'W_conv2':tf.Variable(tf.random_normal([3,3,3,32,64])),
'W_fc':tf.Variable(tf.random_normal([ 54080 ,1024])),#here 54080
is the input tensor value
'out':tf.Variable(tf.random_normal([1024, n_classes]))}
my network biases
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([64])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([n_classes]))}
here is my input x
x = tf.reshape(x, shape=[-1, IMG_SIZE_PX, IMG_SIZE_PX, SLICE_COUNT, 1])
my 2 hidden layers(convolution+maxpooling)
conv1 = tf.nn.relu(conv3d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool3d(conv1)
conv2 = tf.nn.relu(conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool3d(conv2)
my fully connected layer
fc = tf.reshape(conv2,[-1, 54080 ])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
my output layer
output = tf.matmul(fc, weights['out'])+biases['out']
return output
my input numpy arrays
much_data = np.load('D:/muchdata-50-50-20.npy')
train_data = much_data[-10:]
validation_data = much_data[-2:]
finally training my network
def train_neural_network(x):
outl = convolutional_neural_network(x)#don't know this is my output
layer
model=Model(input=x, output=outl)
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=
['accuracy'])
train_neural_network(x)#train the net
my error is thiskeras meta data is missing
any help can be appreciated