Unhashable Type TypeError: Tensors are unhashable. Instead, use tensor.ref() as the key - tensorflow

I am trying to implement a custom variational autoencoder. Following is the code to reproduce.
epsilon_std = 1.0
vx = tf.keras.layers.Input(batch_shape=(None, max_length_output), name='vae_enc_in')
vx_emb = tf.keras.layers.Embedding(
vocab_tar_size,
embedding_dim,
input_length=max_length_output,
name='vae_enc_emb'
)(vx)
vxbi = tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(units, return_sequences=False, recurrent_dropout=0.2, name='vae_enc_lstm'), merge_mode='concat'
)(vx_emb)
vx_drop = tf.keras.layers.Dropout(0.2, name='vae_enc_drop')(vxbi)
vx_dense = tf.keras.layers.Dense(units, activation='linear', name='vae_enc_dense')(vx_drop)
vx_elu = tf.keras.layers.ELU(name='vae_enc_elu')(vx_dense)
vx_drop1 = tf.keras.layers.Dropout(0.2, name='vae_enc_drop2')(vx_elu)
z_mean = tf.keras.layers.Dense(20, name='vae_enc_dense2')(vx_drop1)
z_log_var = tf.keras.layers.Dense(20, name='vae_enc_dense3')(vx_drop1)
def sampling(args):
z_mean, z_log_var = args
epsilon = tf.random.normal(shape=(BATCH_SIZE, 20), mean=0.,
stddev=epsilon_std)
return z_mean + tf.math.exp(z_log_var / 2) * epsilon
z = tf.keras.layers.Lambda(sampling, output_shape=(20,), name='vae_lambda')([z_mean, z_log_var])
repeated_context = tf.keras.layers.RepeatVector(max_length_output, name='vae_repeat')
decoder_h = tf.keras.layers.LSTM(units, return_sequences=True, recurrent_dropout=0.2, name='vae_dec_lstm')
decoder_mean = tf.keras.layers.TimeDistributed(
tf.keras.layers.Dense(vocab_tar_size, activation='linear', name='vae_dec_lstm'),
name='vae_dec_time_dist'
)
h_decoded = decoder_h(repeated_context(z))
x_decoded_mean = decoder_mean(h_decoded)
def zero_loss(y_true, y_pred):
print("ZERO LOSS")
return tf.zeros_like(y_pred)
And then creating a custom vae layer
class VAELayer(tf.keras.layers.Layer):
def __init__(self, batch_size, max_len, **kwargs):
self.is_placeholder = True
super(VAELayer, self).__init__(**kwargs)
self.target_weights = tf.constant(np.ones((batch_size, max_len)), tf.float32)
def vae_loss(self, x, x_decoded_mean):
#xent_loss = K.sum(metrics.categorical_crossentropy(x, x_decoded_mean), axis=-1)
labels = tf.cast(x, tf.int32)
xent_loss = tf.math.reduce_sum(
tfa.seq2seq.sequence_loss(
x_decoded_mean,
labels,
weights=self.target_weights,
average_across_timesteps=False,
average_across_batch=False
),
axis=-1
)
#softmax_loss_function=softmax_loss_f), axis=-1)#, for sampled softmax
kl_loss = - 0.5 * tf.math.reduce_sum(1 + z_log_var - tf.math.square(z_mean) - tf.math.exp(z_log_var), axis=-1)
return tf.math.reduce_mean(xent_loss + kl_loss)
def call(self, inputs):
x = inputs[0]
x_decoded_mean = inputs[1]
print(x.shape, x_decoded_mean.shape)
loss = self.vae_loss(x, x_decoded_mean)
print("Adding loss")
self.add_loss(loss, inputs=inputs)
print("Returning ones like")
return tf.ones_like(x)
I compiled it successfully and also produced a test output by calling the model. But when i try to train, it, It produces the error
TypeError: Tensors are unhashable. (KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='tf.math.reduce_sum_25/Sum:0', description="created by layer 'tf.math.reduce_sum_25'"))Instead, use tensor.ref() as the key.
Following is the code for compiling and fitting the model
loss_layer = VAELayer(BATCH_SIZE, max_length_output)([vx, x_decoded_mean])
vae = tf.keras.Model(vx, [loss_layer], name='VariationalAutoEncoderLayer')
opt = tf.keras.optimizers.Adam(lr=0.01) #SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
vae.compile(optimizer=opt, loss=[zero_loss])
def vae_sentence_generator():
for ip, tg in train_dataset:
yield tg.numpy()
vae.fit(vae_sentence_generator(steps_per_epoch=steps_per_epoch, epochs=10))

Related

Multi-input TF model using TFRecord datasets

I’m trying to create a multi-input single output model in TensorFlow.
I load the data from TFRecs using the get_batched_data fn.
def get_batched_dataset(filenames, batch_size):
dataset = (
tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
.map(parse_tfrecord_fn, num_parallel_calls=AUTO)
.map(prepare_sample, num_parallel_calls=AUTO)
.batch(batch_size)
)
return dataset
In the above fn, I do some preprocessing with the loaded data from TFRecs using the prepare_sample fn.
def prepare_sample(features):
image = features['image']
w = tf.shape(image)[0]
h = tf.shape(image)[1]
# some type of preprocessing/data augmentation/transforms
x = {'l_eye': l_eye, 'r_eye':r_eye, 'kps':kps} #l_eye & r_eye are images, kps is numerical data
y = out
return x, y
Below is a very small version of how I’m trying to code my model architecture, just to get an idea.
class cnn_model(layers.Layer):
def __init__(self, name='cnn-model'):
super(cnn_model, self).__init__()
self.conv1 = layers.Conv2D(32, kernel_size=7, strides=2, padding='valid')
self.conv2 = layers.Conv2D(64, kernel_size=5, strides=2, padding='valid')
self.conv3 = layers.Conv2D(128, kernel_size=3, strides=1, padding='valid')
self.bn1 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.bn2 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.bn3 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.leakyrelu = layers.LeakyReLU(alpha=0.01)
self.avgpool = layers.AveragePooling2D(pool_size=2)
self.dropout = layers.Dropout(rate=0.02)
def call(self, input_image):
x = self.conv1(input_image)
x = self.bn1(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
x = self.conv3(x)
x = self.bn3(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
return x
class num_model(layers.Layer):
def __init__(self, name='num-model'):
super(num_model, self).__init__()
self.dense1 = layers.Dense(128)
self.dense2 = layers.Dense(16)
def call(self, input_keypoints):
x = self.dense1(input_keypoints)
x = self.dense2(x)
return x
class main_model(Model):
def __init__(self, name='main-model'):
super(main_model, self).__init__()
self.cnn_model = cnn_model()
self.num_model = num_model()
self.dense1 = layers.Dense(8)
self.dense2 = layers.Dense(2)
def call(self, input_l_r_kps):
leftEye, rightEye, kps = input_l_r_kps['l_eye'], input_l_r_kps['r_eye'], input_l_r_lms['kps']
l_eye_feat = tf.reshape(self.cnn_model(leftEye), (1, 3*128*128))
r_eye_feat = tf.reshape(self.cnn_model(rightEye), (1, 3*128*128))
kp_feat = self.num_model(kps)
combined_feat = tf.concat((l_eye_feat, r_eye_feat, lm_feat),1)
x = self.dense1(combined_feat)
x = self.dense2(x)
return x
Now, the dataset returned by the get_batched_dataset fn is what I’ll be feeding into the Keras model.fit method.
train_dataset = get_batched_dataset('train.tfrec', batch_size)
valid_dataset = get_batched_dataset('valid.tfrec', batch_size)
model.fit(
x=train_dataset,
batch_size=batch_size,
epochs=1,
validation_data=valid_dataset,
use_multiprocessing=False
)
Can you please guide me where I’m going wrong? Is it in the prepare_sample fn by returning x as a dict, or somewhere in the model code? I’m really new to TF and confused.
Any help appreciated!

When I use transformer model to train a translator, why it cannot convert y to a tensor

def encoder():
input_layer = Input(batch_shape=(None, 13, 128))
h= layer(input_layer)
h= Masking(mask_value=0.0)(h)
h, hidden_layer, cell_layer = LSTM(512, return_state=True)(h)
model = Model(inputs = input_layer, outputs = [hidden_layer, cell_layer])
return model
model=encoder()
model.summary()
class Decoder(Model):
def __init__(self):
super(Decoder, self).__init__()
self.embedding_layer = Embedding(input_dim=max_tokens+1, output_dim=128, mask_zero=True)
self.lstm_layer = LSTM(512,
return_state=True, return_sequences=True)
self.dense_layer = Dense(units=max_tokens+1)
def call(self,inputer,hidden_layer=None,cell_layer=None):
x=self.embedding_layer(inputer)
if hidden_layer!=None and cell_layer!=None:
x, h, c = self.lstm_layer(x, initial_state=[hidden_layer, cell_layer])
else:
x, h, c = self.lstm_layer(x)
x=self.dense_layer(x)
return x,h,c
decoder=Decoder()
for eng,germ in train.take(1):
y,hidden,cell = decoder(germ)
#tf.function
def loss_fn(en_input, germ_input, germ_output, loss):
with tf.GradientTape() as tape:
enc_hidden_s, enc_cell_s = model(en_input)
dec_output, dec_hidden_s, dec_cell_s = decoder(germ_input, enc_hidden_s,enc_cell_s)
loss_value = loss(germ_output, dec_output)
return loss_value, tape.gradient(loss_value, variables)
def fit_german_shape(german):
input_data = german[:,:-1]
output_data = german[:,1:]
return input_data,output_data
def training(train_data, test_data,optimizer, loss,epochs=5):
batch_num=0
batch_num2=0
epoch_loss=0
epoch_loss2=0
for english,germany in train:
germany_in,germany_out=fit_german_shape(germany)
loss2, grad= loss_fn(english, germany_in, germany_out, loss)
optimizer.apply_gradients(zip(grad,model.trainable_variables + decoder.trainable_variables))
epoch_loss=epoch_loss+loss2
batch_num=batch_num+1
avg_loss=epoch_loss/batch_num
avg_loss3=String(avg_loss1)
print("In this train epoch, the loss is"+ave_loss3)
for english2,germany2 in test:
germany_in2,germany_out2=fit_german_shape(germany2)
hidden_state,cell_state=model(en)
pred,temp1,temp2=decoder(germany_in2,hidden_state,cell_state)
loss, temp3 = loss_fn(english2, germany_in2, germany_out2)
epoch_loss2=loss+epoch_loss2
batch_num=batch_num+1
avg_loss2=epoch_loss2/batch_num2
avg_loss4=String(avg_loss2)
print("In this test epoch, the loss is"+ave_loss4)
return avg_loss,avg_loss2
When I use this model to translate German to English, it report the error that "Tried to convert 'y' to a tensor and failed. Error: None values not supported." Error may occur in the decoder to assign value to x,h,c, but I dont know why cannot convert y to a tensor.

How to load the model from my architecture?

I have done this code and I need to load the model to work later on it but when I try to use load_model() the error is No model found in config file. And when I try to load the weights the error is Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.
This is my code
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
I defined an encoder and a decorder that I will use later in
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super(VAE, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
def train_step(self, data):
if isinstance(data, tuple):
data = data[0]
with tf.GradientTape() as tape:
z_mean, z_log_var, z = encoder(data)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.binary_crossentropy(data, reconstruction)
)
reconstruction_loss *= 64 * 64 * 3
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}
def test_step(self, data):
if isinstance(data, tuple):
data = data[0]
with tf.GradientTape() as tape:
z_mean, z_log_var, z = encoder(data)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.binary_crossentropy(data, reconstruction)
)
reconstruction_loss *= 64 * 64 * 3
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}
Finally this is how I use it and create the model
model_name = 'car_racing_VAE.h5'
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(0.001))
checkpointer = keras.callbacks.ModelCheckpoint(filepath=model_name, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_freq='epoch')
history = vae.fit(train, train,
epochs=150,
batch_size = 128,
shuffle=True,
validation_data=(val, val), validation_batch_size=128,
callbacks=[checkpointer])
So, how can I load the model and use it later?
model = load_model(model_name)
vae.load_weights(model_name)
None of them are working
As per the error it looks like issue with the loading the saved model from the path.
As suggested by Gerry P in comment check the location of saved model and load it from the same location.
You can use the below code to avoid the location issue.
#saving model
model = ... # Get model
model.save('path/to/location')
#loading the model back
from tensorflow import keras
model = keras.models.load_model('path/to/location')

Call method not implemented runtime error, when creating VAE in Keras. Model Subclassing

I'm running the VAE Keras code sample from here: https://keras.io/examples/generative/vae/ and it works OK. But when I modify it to use a validation_split = 0.2 in vae.fit() method I'm getting a runtime error: NotImplementedError: When subclassing the Model class, you should implement a call method.
The code uses Model subclassing. My question is: Why it works well without using validation_split parameter on fit()? and what I have to do to get the VAE working with a validation_split?
I'm already tried to remove the model names as suggested here: During creating VAE model throws exception "you should implement a `call` method." but the problem persists.
decoder =keras.Model(latent_inputs, decoder_outputs)
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z])
Here is the Model subclass:
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super(VAE, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
def train_step(self, data):
if isinstance(data, tuple):
data = data[0]
with tf.GradientTape() as tape:
z_mean, z_log_var, z = encoder(data)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.binary_crossentropy(data, reconstruction)
)
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}
Sampling layer:
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
Encoder definition:
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()
Decoder definition:
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(latent_inputs)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()
Train VAE
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train, x_test], axis=0)
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(mnist_digits, epochs=30, batch_size=128, validation_split=0.2)
Thanks in advance!
Solved: I found the answer in this issue in Github: https://github.com/keras-team/keras-io/issues/38
The solution is to add a test_step for use in the validation phase, in the Model subclass.
def test_step(self, data):
if isinstance(data, tuple):
data = data[0]
z_mean, z_log_var, z = encoder(data)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.binary_crossentropy(data, reconstruction)
)
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + kl_loss
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}

The model cannot be compiled because it has no loss to optimize

I write a vae model which posterior is GMM ,and use self.add_loss to define vae loss,but an error occur when i fit my model:
ValueError: The model cannot be compiled because it has no loss to optimize.
here is my code:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import tensorflow_probability as tfp
import numpy as np
tfd = tfp.distributions
tf.test.is_gpu_available()
# data
(x_train, x_labels), (x_val, x_val_labels) = mnist.load_data()
x_train = x_train.reshape(60000, 784).astype("float32") / 255.
x_val = x_val.reshape(10000, 784).astype("float32") / 255.
x_train[x_train >= 0.5] = 1.
x_train[x_train < 0.5] = 0.
x_val[x_val >= 0.5] = 1.
x_val[x_val < 0.5] = 0.
# from softmax to one_hot
def props_to_onehot(props):
if isinstance(props, list):
props = np.array(props)
a = np.argmax(props, axis=1)
b = np.zeros((len(a), props.shape[1]))
b[np.arange(len(a)), a] = 1
return b
# reparameter
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
class Encoder(layers.Layer):
def __init__(self, latent_dim, base_depth, components, name='encoder', **kwargs):
"""
latent_size: the dimensionality of latent variable z(also the dim of u and Σ)
base_depth: base units of Dense
components: the numbers of gussian distribution.In this case ,we set components = 10
"""
super(Encoder, self).__init__(name=name, **kwargs)
self.latent_size = latent_dim
self.base_depth = base_depth
self.components = components
# shared structured of encoder
self.dense1 = Dense(8 * self.base_depth, activation='relu', name='1')
self.dropout1 = tf.keras.layers.Dropout(0.2)
self.dense2 = Dense(4 * self.base_depth, activation='relu', name='2')
self.dropout2 = tf.keras.layers.Dropout(0.2)
self.dense3 = Dense(4 * self.base_depth, activation='relu', name='3')
self.dense4 = Dense(2 * self.base_depth, activation='relu', name='4')
self.dense5 = Dense(2 * self.base_depth, activation='relu', name='5')
# the output parameters of encoder including {pi,u,Σ}
self.parameters = Dense(self.components + self.components * 2 * self.latent_size, name='6')
self.sampling = Sampling()
def call(self, inputs):
# shared structure output
x = self.dense1(inputs)
x = self.dropout1(x)
x = self.dense2(x)
x = self.dropout2(x)
x = self.dense3(x)
x = self.dense4(x)
x = self.dense5(x)
# meaningful parameters
parameters = self.parameters(x)
pi, _ = tf.split(parameters, [self.components, 10 * 2 * self.latent_size], axis=-1)
pi = tf.nn.softmax(pi)
pi = props_to_onehot(pi)
batch_size_int = tf.shape(pi)[0].numpy()
batch_list = []
for i in range(batch_size_int):
index = np.argmax(pi[0])
batch_list.append(parameters[0][self.components + index * 2 * self.latent_size + 1:self.components + (
index + 1) * 2 * self.latent_size + 1])
batch_list = np.array(batch_list) # (batch_size,2*latent_size)
# (batch_size,latent_size);(batch_size,latent_size)
z_mean, z_log_var = tf.split(batch_list, [self.latent_size, self.latent_size], axis=-1)
z = self.sampling((z_mean, z_log_var))
kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
self.add_loss(kl_loss)
return z_mean, z_log_var, z
class Decoder(layers.Layer):
def __init__(self, base_depth, name="decoder", **kwargs):
super(Decoder, self).__init__(name=name, **kwargs)
self.base_depth = base_depth
self.dense1 = Dense(self.base_depth)
self.dense2 = Dense(2 * self.base_depth, activation='relu')
self.dense3 = Dense(4 * self.base_depth, activation='relu')
self.dropout1 = tf.keras.layers.Dropout(0.2)
self.dense4 = Dense(4 * self.base_depth, activation='relu')
self.dense5 = Dense(8 * self.base_depth, activation='relu')
self.dropout2 = tf.keras.layers.Dropout(0.2)
# no activation
self.dense_out = Dense(784)
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
x = self.dense3(x)
x = self.dropout1(x)
x = self.dense4(x)
x = self.dense5(x)
x = self.dropout2(x)
x = self.dense_out(x)
# shape=(B,784)
return x
class GMM_VAE_Posterior(tf.keras.Model):
def __init__(self, latent_dim, base_depth, components, name='auto_encoder', **kwargs):
super(GMM_VAE_Posterior, self).__init__(name=name, **kwargs)
self.latent_dim = latent_dim
self.base_depth = base_depth
self.components = components
self.encoder = Encoder(self.latent_dim, self.base_depth, self.components)
self.decoder = Decoder(self.base_depth)
def call(self, inputs):
z_mean, z_log_var, z = self.encoder(inputs)
out = self.decoder(z) # (batch_size,784)
reconstructions_error = tf.nn.sigmoid_cross_entropy_with_logits(labels=inputs, logits=out)
reconstructions_error = tf.reduce_sum(reconstructions_error, axis=-1)
reconstructions_error = tf.reduce_mean(reconstructions_error)
self.add_loss(reconstructions_error)
# shape:(batch_size,784)
return out
vae_gmm = GMM_VAE_Posterior(16, 64, 10)
vae_gmm.compile(optimizer=tf.keras.optimizers.Adam())
vae_gmm.fit(x_train, x_train, epochs=5, batch_size=64) # error
In my view,i think the computation graph of my model is not complete,so model can not BP.But it is just my gusses.
On model compiling, you must fill in the loss parameter. So, when you added the loss in another way, simply set it to None:
vae_gmm.compile(optimizer=tf.keras.optimizers.Adam(), loss = None)