tensorflow meet a warning "Gradients do not exist for variables" - tensorflow

sorry I'm a beginner to tensorflow, when I tried to write a custom layer there was a warning 'Gradients do not exist for variables' after call model.fit()
To illastrate my issues, here are my code:
class gen_noise_sig(layers.Layer):
def __init__(self):
super(gen_noise_sig, self).__init__()
self.m = self.add_weight('m',[1],trainable=True).numpy()
self.sig = self.add_weight('sig',[1],trainable=True).numpy()
def get_config(self):
config = super().get_config()
config.update({
"m": self.m,
"sig": self.sig,
})
return config
def call(self, inputs, training = True):
out = inputs + tf.random.normal(tf.shape(inputs),mean=self.m,stddev=self.sig)
out = tf.expand_dims(out,axis=-1)
return out
def AutoEncoder():
model = tf.keras.Sequential()
model.add(gen_noise_sig())
model.add(layers.Conv1D(8,strides = 5,kernel_size = 128,padding = 'same',activation = 'relu',name='encoder_input'))
model.add(layers.Conv1D(16,strides = 5,kernel_size = 32,padding = 'same',activation = 'leaky_relu'))
model.add(layers.Conv1D(32,strides = 5,kernel_size = 16,padding = 'same'))
model.add(layers.Conv1D(128,strides = 4,kernel_size = 4,padding = 'same'))
model.add(layers.Conv1D(128,strides = 2,kernel_size = 2,padding = 'same',name='encoder_output'))
model.add(layers.Conv1DTranspose(128,strides = 2,kernel_size = 4,padding = 'same', name = 'decoder_input',activation='leaky_relu'))
model.add(layers.Conv1DTranspose(32,strides = 4,kernel_size = 4,padding = 'same'))
model.add(layers.Conv1DTranspose(16,strides = 5,kernel_size = 64,padding = 'same'))
model.add(layers.Conv1DTranspose(8,strides = 5,kernel_size = 64,padding = 'same'))
model.add(layers.Conv1DTranspose(1,strides = 5,kernel_size = 64,padding = 'same',name = 'decoder_output'))
model.add(layers.Flatten())
return model
model = AutoEncoder()
model.build(input_shape= (None,10000))
model.compile(optimizer='adam',
loss='mae',
metrics=['mae']
)
# print(model.trainable_weights)
filepath = 'AutoEncoder/Denoising/Damage/CNN_AE_model1020.h5'
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
history = model.fit(targetData,
targetData,
batch_size=200,
epochs=1000,
validation_split = 0.1,
callbacks=[checkpoint])
Can someone tell me why the gradients do not exist and how to solve this problem, please.

Related

Multi-input TF model using TFRecord datasets

I’m trying to create a multi-input single output model in TensorFlow.
I load the data from TFRecs using the get_batched_data fn.
def get_batched_dataset(filenames, batch_size):
dataset = (
tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
.map(parse_tfrecord_fn, num_parallel_calls=AUTO)
.map(prepare_sample, num_parallel_calls=AUTO)
.batch(batch_size)
)
return dataset
In the above fn, I do some preprocessing with the loaded data from TFRecs using the prepare_sample fn.
def prepare_sample(features):
image = features['image']
w = tf.shape(image)[0]
h = tf.shape(image)[1]
# some type of preprocessing/data augmentation/transforms
x = {'l_eye': l_eye, 'r_eye':r_eye, 'kps':kps} #l_eye & r_eye are images, kps is numerical data
y = out
return x, y
Below is a very small version of how I’m trying to code my model architecture, just to get an idea.
class cnn_model(layers.Layer):
def __init__(self, name='cnn-model'):
super(cnn_model, self).__init__()
self.conv1 = layers.Conv2D(32, kernel_size=7, strides=2, padding='valid')
self.conv2 = layers.Conv2D(64, kernel_size=5, strides=2, padding='valid')
self.conv3 = layers.Conv2D(128, kernel_size=3, strides=1, padding='valid')
self.bn1 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.bn2 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.bn3 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.leakyrelu = layers.LeakyReLU(alpha=0.01)
self.avgpool = layers.AveragePooling2D(pool_size=2)
self.dropout = layers.Dropout(rate=0.02)
def call(self, input_image):
x = self.conv1(input_image)
x = self.bn1(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
x = self.conv3(x)
x = self.bn3(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
return x
class num_model(layers.Layer):
def __init__(self, name='num-model'):
super(num_model, self).__init__()
self.dense1 = layers.Dense(128)
self.dense2 = layers.Dense(16)
def call(self, input_keypoints):
x = self.dense1(input_keypoints)
x = self.dense2(x)
return x
class main_model(Model):
def __init__(self, name='main-model'):
super(main_model, self).__init__()
self.cnn_model = cnn_model()
self.num_model = num_model()
self.dense1 = layers.Dense(8)
self.dense2 = layers.Dense(2)
def call(self, input_l_r_kps):
leftEye, rightEye, kps = input_l_r_kps['l_eye'], input_l_r_kps['r_eye'], input_l_r_lms['kps']
l_eye_feat = tf.reshape(self.cnn_model(leftEye), (1, 3*128*128))
r_eye_feat = tf.reshape(self.cnn_model(rightEye), (1, 3*128*128))
kp_feat = self.num_model(kps)
combined_feat = tf.concat((l_eye_feat, r_eye_feat, lm_feat),1)
x = self.dense1(combined_feat)
x = self.dense2(x)
return x
Now, the dataset returned by the get_batched_dataset fn is what I’ll be feeding into the Keras model.fit method.
train_dataset = get_batched_dataset('train.tfrec', batch_size)
valid_dataset = get_batched_dataset('valid.tfrec', batch_size)
model.fit(
x=train_dataset,
batch_size=batch_size,
epochs=1,
validation_data=valid_dataset,
use_multiprocessing=False
)
Can you please guide me where I’m going wrong? Is it in the prepare_sample fn by returning x as a dict, or somewhere in the model code? I’m really new to TF and confused.
Any help appreciated!

How to build a Siamese Network from Transformer Model? Shape Input Error

I have the following Base Network with some important (error is coming due to these) parameters (please assume every else parameter)
maxlen = 250
model_dense = 256
Base Model :
def build_base_model(inputs):
inputs = layers.Input(shape=(maxlen,),name='base_input')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, trans_drop1, trans_drop2, trans_reg1, trans_reg2)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(model_drop1)(x)
outputs = layers.Dense(model_dense)(x)
base_model = keras.Model(inputs=inputs, outputs=outputs)
return base_model
and I my Siamese network as:
base_model = build_base_model()
input_text1 = layers.Input(shape=(maxlen,))
input_text2 = layers.Input(shape=(maxlen,))
emb1 = base_model(input_text1)
emb2 = base_model(input_text2)
distance = layers.Lambda(euclidean_distance)([emb1, emb2])
outputs = layers.Dense(1, activation="sigmoid")(distance)
model = keras.Model(inputs=[emb1, emb2], outputs=outputs)
model.compile(
optimizer="adam", metrics = ["accuracy",], loss= 'binary_crossentropy')
history = model.fit(
train_X, train_y, batch_size=batch_size, epochs = 50, validation_split = 0.15, callbacks = callbacks, verbose = 1,
)
It gives me an error as:
ValueError: Input 0 of layer "model_11" is incompatible with the layer: expected shape=(None, 256), found shape=(None, 250)
What am I doing wrong?
Base Transformer model tutorial taken from this
Siamese Model Structure, cosine distance, make_pairs from this
UPDATE- I have built the new network in a different manner and it is up and running. Can someone please confirms if it is the correct one:
inputs1 = layers.Input(shape=(maxlen,),name='inp_1')
inputs2 = layers.Input(shape=(maxlen,),name='inp_2')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, trans_drop1, trans_drop2, trans_reg1, trans_reg2)
pooling = layers.GlobalAveragePooling1D()
drop_layer = layers.Dropout(model_drop1)
out_dense = layers.Dense(model_dense)
x1 = embedding_layer(inputs1)
x2 = embedding_layer(inputs2)
x1 = transformer_block(x1)
x2 = transformer_block(x2)
x1 = pooling(x1)
x2 = pooling(x2)
x1 = drop_layer(x1)
x2 = drop_layer(x2)
vec_x1 = out_dense(x1)
vec_x2 = out_dense(x2)
distance = layers.Lambda(euclidean_distance)([vec_x1, vec_x2])
outputs = layers.Dense(1, activation="sigmoid")(distance)
model = keras.Model(inputs=[inputs1, inputs2], outputs=outputs)
in the linemodel = keras.Model(inputs=[emb1, emb2], outputs=outputs):
I suspect that you are mean to saymodel = keras.Model(inputs=[input_text1, input_text2], outputs=outputs)

Inputs shape is uknown during training (model subclassing)

Consider following model
class FractalNeuralNetwork(tf.keras.Model):
def __init__(self, class_number):
super(FractalNeuralNetwork, self).__init__()
self.box_counting_patches = [BoxCountingPatch(box_size) for box_size in range(3, 41 + 1, 2)]
self.chebyshev = ChebyshevBinaryPatch()
self.euclidean = EuclideanBinaryPatch()
self.manhattan = ManhattanBinaryPatch()
self.percolation_c = PercolationC()
self.percolation_m = PercolationM()
self.percolation_q = PercolationQ()
self.probability = ProbabilityMatrix()
self.fractal_dimension = FractalDimension()
self.lacunarity = Lacunarity()
self.assemble = AssembleFractalImage()
self.resize = tf.keras.layers.Resizing(width=224, height=224)
self.rescale = tf.keras.layers.Rescaling(scale=1./255)
self.mobilenet_v2 = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4",
output_shape=[1280],
trainable=False)
self.combine = tf.keras.layers.Add()
self.score = tf.keras.layers.Dense(class_number, activation='softmax')
def call(self, inputs):
inputs = tf.ensure_shape(inputs, self.ensure_input_shape)
box_counting_patches = [box_counting_patch(inputs) for box_counting_patch in self.box_counting_patches]
chebyshev = self.chebyshev(inputs=box_counting_patches)
euclidean = self.euclidean(inputs=box_counting_patches)
manhattan = self.manhattan(inputs=box_counting_patches)
percolation_c = self.percolation_c(inputs=[chebyshev, euclidean, manhattan])
percolation_m = self.percolation_m(inputs=[chebyshev, euclidean, manhattan])
percolation_q = self.percolation_q(inputs=[chebyshev, euclidean, manhattan])
probability = self.probability(inputs=[chebyshev, euclidean, manhattan])
fractal_dimension = self.fractal_dimension(inputs=probability)
lacunarity = self.lacunarity(inputs=probability)
fractal_output = self.assemble(
inputs=[
fractal_dimension,
lacunarity,
percolation_c,
percolation_m,
percolation_q
]
)
fractal_output = self.resize(fractal_output)
fractal_output = self.rescale(fractal_output)
fractal_output = self.mobilenet_v2(fractal_output)
original_output = self.rescale(inputs)
original_output = self.mobilenet_v2(original_output)
combined_output = self.combine([fractal_output, original_output])
output = self.score(combined_output)
return output
Every custom layer here is not trainable, they just perform calculations - extract fractal features from images.
The model is trained with the following code:
model = FractalNeuralNetwork(
class_number=CLASS_NUMBER
)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(
training_set,
validation_data=validation_set,
epochs=1
)
The first batch comes with a normal shape (None, 224, 224, 3) but the second (None, None, None, None) and it break my model. Why does it happen?

How to implement U Net for custom dataset using Tensorflow and Keras

I want to implement U Net for semantic segmentation on my own dataset which contains two classes. Can anyone please let me know how can I implement with Tensorflow and Keras.
I have two classes in my dataset with their corresponding labels.
This is an U-Net implementation for binary classification/segmentation.
def UNET_224(dropout_val=0.0, weights=None): # No dropout by default
if K.image_dim_ordering() == 'th':
inputs = Input((INPUT_CHANNELS, 224, 224))
axis = 1
else:
inputs = Input((224, 224, INPUT_CHANNELS))
axis = 3
filters = 32
conv_224 = double_conv_layer(inputs, filters)
pool_112 = MaxPooling2D(pool_size=(2, 2))(conv_224)
conv_112 = double_conv_layer(pool_112, 2*filters)
pool_56 = MaxPooling2D(pool_size=(2, 2))(conv_112)
conv_56 = double_conv_layer(pool_56, 4*filters)
pool_28 = MaxPooling2D(pool_size=(2, 2))(conv_56)
conv_28 = double_conv_layer(pool_28, 8*filters)
pool_14 = MaxPooling2D(pool_size=(2, 2))(conv_28)
conv_14 = double_conv_layer(pool_14, 16*filters)
pool_7 = MaxPooling2D(pool_size=(2, 2))(conv_14)
conv_7 = double_conv_layer(pool_7, 32*filters)
up_14 = concatenate([UpSampling2D(size=(2, 2))(conv_7), conv_14], axis=axis)
up_conv_14 = double_conv_layer(up_14, 16*filters)
up_28 = concatenate([UpSampling2D(size=(2, 2))(up_conv_14), conv_28], axis=axis)
up_conv_28 = double_conv_layer(up_28, 8*filters)
up_56 = concatenate([UpSampling2D(size=(2, 2))(up_conv_28), conv_56], axis=axis)
up_conv_56 = double_conv_layer(up_56, 4*filters)
up_112 = concatenate([UpSampling2D(size=(2, 2))(up_conv_56), conv_112], axis=axis)
up_conv_112 = double_conv_layer(up_112, 2*filters)
up_224 = concatenate([UpSampling2D(size=(2, 2))(up_conv_112), conv_224], axis=axis)
up_conv_224 = double_conv_layer(up_224, filters, dropout_val)
conv_final = Conv2D(OUTPUT_MASK_CHANNELS, (1, 1))(up_conv_224)
conv_final = Activation('sigmoid')(conv_final)
model = Model(inputs, conv_final, name="UNET_224")
return model

Hierarchical Attention in TensorFlow 2.0

I am trying to create hierarchical attention in TensorFlow 2.0 using the AdditiveAttention Keras layer. The error I get:
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("question_input:0", shape=(None, None), dtype=float32) at layer "question_input". The following previous layers were accessed without issue: []
Can someone please tell me what I am doing wrong?
def get_text_model(self, embedding):
print("Text Input")
text_input = Input(shape=(None,), name="text_input")
text_embedding = embedding(text_input)
cnn_1d = Conv1D(128, 4, padding="same", activation="relu", strides=1)(text_embedding)
output = cnn_1d
model = Model(text_input, output)
return model
def get_sentence_attention_model(self, sentence_input, encoded_question, sentence_model):
encoded_sentence = sentence_model(sentence_input)
sentence_attention = AdditiveAttention()([encoded_sentence, encoded_question])
output = Concatenate()([sentence_attention, encoded_question])
model = Model(sentence_input, output)
return model
def get_section_model(self, encoded_question, sentence_model):
section_input = Input(shape=(None, None), name="section_input")
section_encoded = TimeDistributed(sentence_model)([self.question_input, section_input])
cnn_1d = Conv1D(128, 4, padding="same", activation="relu", strides=1)(section_encoded)
output = cnn_1d
section_attention_output = AdditiveAttention()([output, encoded_question])
model = Model(section_input, section_attention_output)
return model
def get_document_model(self, encoded_question, section_model):
document_input = Input(shape=(None, None, None), name="document_input")
document_encoded = TimeDistributed(section_model)(document_input)
cnn_1d = Conv1D(128, 4, padding="same", activation="relu", strides=1)(document_encoded)
document_attention = AdditiveAttention()([cnn_1d, encoded_question])
model = Model(document_input, document_attention)
return model
def get_model(self):
self.vocabulary_size = self.vectorizer.get_vocabulary_size()
self.embedding_matrix = self.vectorizer.get_embedding_matrix()
embedding = Embedding(self.vocabulary_size, self.embedding_size, mask_zero=True, trainable=True,
weights=None if self.embedding_matrix is None else [self.embedding_matrix])
self.question_input = Input(shape=(None,), name="question_input")
self.sentence_input = Input(shape=(None,), name="sentence_input")
self.question_model = self.get_text_model(embedding)
self.sentence_model = self.get_text_model(embedding)
self.encoded_question = self.question_model(self.question_input)
self.sentence_attention_model = self.get_sentence_attention_model(self.sentence_input, self.encoded_question, self.sentence_model)
self.section_model = self.get_section_model(self.encoded_question, self.sentence_attention_model)
self.document_model = self.get_document_model(self.encoded_question, self.section_model)
optimizer = Adadelta()
loss_metrics = "binary_crossentropy"
self.document_model.compile(loss=loss_metrics, optimizer=optimizer, metrics=[loss_metrics])
self.document_model.summary()