The model cannot be compiled because it has no loss to optimize - tensorflow

I write a vae model which posterior is GMM ,and use self.add_loss to define vae loss,but an error occur when i fit my model:
ValueError: The model cannot be compiled because it has no loss to optimize.
here is my code:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import tensorflow_probability as tfp
import numpy as np
tfd = tfp.distributions
tf.test.is_gpu_available()
# data
(x_train, x_labels), (x_val, x_val_labels) = mnist.load_data()
x_train = x_train.reshape(60000, 784).astype("float32") / 255.
x_val = x_val.reshape(10000, 784).astype("float32") / 255.
x_train[x_train >= 0.5] = 1.
x_train[x_train < 0.5] = 0.
x_val[x_val >= 0.5] = 1.
x_val[x_val < 0.5] = 0.
# from softmax to one_hot
def props_to_onehot(props):
if isinstance(props, list):
props = np.array(props)
a = np.argmax(props, axis=1)
b = np.zeros((len(a), props.shape[1]))
b[np.arange(len(a)), a] = 1
return b
# reparameter
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
class Encoder(layers.Layer):
def __init__(self, latent_dim, base_depth, components, name='encoder', **kwargs):
"""
latent_size: the dimensionality of latent variable z(also the dim of u and Σ)
base_depth: base units of Dense
components: the numbers of gussian distribution.In this case ,we set components = 10
"""
super(Encoder, self).__init__(name=name, **kwargs)
self.latent_size = latent_dim
self.base_depth = base_depth
self.components = components
# shared structured of encoder
self.dense1 = Dense(8 * self.base_depth, activation='relu', name='1')
self.dropout1 = tf.keras.layers.Dropout(0.2)
self.dense2 = Dense(4 * self.base_depth, activation='relu', name='2')
self.dropout2 = tf.keras.layers.Dropout(0.2)
self.dense3 = Dense(4 * self.base_depth, activation='relu', name='3')
self.dense4 = Dense(2 * self.base_depth, activation='relu', name='4')
self.dense5 = Dense(2 * self.base_depth, activation='relu', name='5')
# the output parameters of encoder including {pi,u,Σ}
self.parameters = Dense(self.components + self.components * 2 * self.latent_size, name='6')
self.sampling = Sampling()
def call(self, inputs):
# shared structure output
x = self.dense1(inputs)
x = self.dropout1(x)
x = self.dense2(x)
x = self.dropout2(x)
x = self.dense3(x)
x = self.dense4(x)
x = self.dense5(x)
# meaningful parameters
parameters = self.parameters(x)
pi, _ = tf.split(parameters, [self.components, 10 * 2 * self.latent_size], axis=-1)
pi = tf.nn.softmax(pi)
pi = props_to_onehot(pi)
batch_size_int = tf.shape(pi)[0].numpy()
batch_list = []
for i in range(batch_size_int):
index = np.argmax(pi[0])
batch_list.append(parameters[0][self.components + index * 2 * self.latent_size + 1:self.components + (
index + 1) * 2 * self.latent_size + 1])
batch_list = np.array(batch_list) # (batch_size,2*latent_size)
# (batch_size,latent_size);(batch_size,latent_size)
z_mean, z_log_var = tf.split(batch_list, [self.latent_size, self.latent_size], axis=-1)
z = self.sampling((z_mean, z_log_var))
kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
self.add_loss(kl_loss)
return z_mean, z_log_var, z
class Decoder(layers.Layer):
def __init__(self, base_depth, name="decoder", **kwargs):
super(Decoder, self).__init__(name=name, **kwargs)
self.base_depth = base_depth
self.dense1 = Dense(self.base_depth)
self.dense2 = Dense(2 * self.base_depth, activation='relu')
self.dense3 = Dense(4 * self.base_depth, activation='relu')
self.dropout1 = tf.keras.layers.Dropout(0.2)
self.dense4 = Dense(4 * self.base_depth, activation='relu')
self.dense5 = Dense(8 * self.base_depth, activation='relu')
self.dropout2 = tf.keras.layers.Dropout(0.2)
# no activation
self.dense_out = Dense(784)
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
x = self.dense3(x)
x = self.dropout1(x)
x = self.dense4(x)
x = self.dense5(x)
x = self.dropout2(x)
x = self.dense_out(x)
# shape=(B,784)
return x
class GMM_VAE_Posterior(tf.keras.Model):
def __init__(self, latent_dim, base_depth, components, name='auto_encoder', **kwargs):
super(GMM_VAE_Posterior, self).__init__(name=name, **kwargs)
self.latent_dim = latent_dim
self.base_depth = base_depth
self.components = components
self.encoder = Encoder(self.latent_dim, self.base_depth, self.components)
self.decoder = Decoder(self.base_depth)
def call(self, inputs):
z_mean, z_log_var, z = self.encoder(inputs)
out = self.decoder(z) # (batch_size,784)
reconstructions_error = tf.nn.sigmoid_cross_entropy_with_logits(labels=inputs, logits=out)
reconstructions_error = tf.reduce_sum(reconstructions_error, axis=-1)
reconstructions_error = tf.reduce_mean(reconstructions_error)
self.add_loss(reconstructions_error)
# shape:(batch_size,784)
return out
vae_gmm = GMM_VAE_Posterior(16, 64, 10)
vae_gmm.compile(optimizer=tf.keras.optimizers.Adam())
vae_gmm.fit(x_train, x_train, epochs=5, batch_size=64) # error
In my view,i think the computation graph of my model is not complete,so model can not BP.But it is just my gusses.

On model compiling, you must fill in the loss parameter. So, when you added the loss in another way, simply set it to None:
vae_gmm.compile(optimizer=tf.keras.optimizers.Adam(), loss = None)

Related

tensorflow implementation is much slower than pytorch (with slow gpu utility) and occupy more memory

I am trying to converting a pytorch implementation to tensorflow, but I found that the program occupies more memory, has lower gpu utility and is much more slower than pytorch version. I am wondering it is a general case, or I am wrong with the tf model structure?
I put the code on, it can be directly run.
https://colab.research.google.com/drive/1oI6GVnt3sAULvbMMAGTY4B6LsHguJd9U#scrollTo=DNH5pPynm-jm
The pytorch version uses half memory with twice gpu util compared with tf version.
Code:
import math
import os
import tensorflow as tf
import json
import numpy as np
import time
def calc_diffusion_step_embedding(diffusion_steps, diffusion_step_embed_dim_in):
assert diffusion_step_embed_dim_in % 2 == 0
half_dim = diffusion_step_embed_dim_in // 2
_embed = tf.math.log(tf.convert_to_tensor(10000.0)) / (half_dim - 1)
_embed = tf.math.exp(tf.cast(tf.experimental.numpy.arange(start = 0,stop = half_dim),dtype = tf.float32) * -_embed)
_embed = tf.cast(diffusion_steps,dtype=tf.float32) * _embed
diffusion_step_embed = tf.concat((tf.math.sin(_embed),
tf.math.cos(_embed)), 1)
assert diffusion_step_embed.shape[0] == diffusion_steps.shape[0]
assert diffusion_step_embed.shape[1] == diffusion_step_embed_dim_in
return diffusion_step_embed
class Residual_block(tf.keras.layers.Layer):
def __init__(self, res_channels, skip_channels,
diffusion_step_embed_dim_out, in_channels,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(Residual_block, self).__init__()
self.res_channels = res_channels
self.fc_t = tf.keras.layers.Dense(self.res_channels)
self.conv_layer = tf.keras.layers.Conv1D(filters=2 * self.res_channels, kernel_size=3, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.cond_conv = tf.keras.layers.Conv1D(filters=2*self.res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_conv1 = tf.keras.layers.Conv1D(filters=res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_conv2 = tf.keras.layers.Conv1D(filters=skip_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
def call(self, input_data):
x, cond, diffusion_step_embed = input_data
h = x
B, C, L = h.shape
part_t = self.fc_t(diffusion_step_embed)
part_t = tf.reshape(part_t,[B, self.res_channels, 1])
h = h + part_t
h = self.conv_layer(h)
cond = self.cond_conv(cond)
h += cond
out = tf.math.tanh(h[:,:self.res_channels,:]) * tf.math.sigmoid(h[:,self.res_channels:,:])
res = self.res_conv1(out)
skip = self.res_conv2(out)
return (x + res) * tf.math.sqrt(0.5), skip # normalize for training stability
class Residual_group(tf.keras.Model):
def __init__(self, res_channels, skip_channels, num_res_layers,
diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out,
in_channels,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(Residual_group, self).__init__()
self.num_res_layers = num_res_layers
self.diffusion_step_embed_dim_in = diffusion_step_embed_dim_in
self.fc_t1 = tf.keras.layers.Dense(diffusion_step_embed_dim_mid)
self.fc_t2 = tf.keras.layers.Dense(diffusion_step_embed_dim_out)
self.residual_blocks = []
for n in range(self.num_res_layers):
self.residual_blocks.append(Residual_block(res_channels, skip_channels,
diffusion_step_embed_dim_out=diffusion_step_embed_dim_out,
in_channels=in_channels,
s4_lmax=s4_lmax,
s4_d_state=s4_d_state,
s4_dropout=s4_dropout,
s4_bidirectional=s4_bidirectional,
s4_layernorm=s4_layernorm))
def call(self, input_data):
h, conditional, diffusion_steps = input_data
diffusion_step_embed = calc_diffusion_step_embedding(diffusion_steps, self.diffusion_step_embed_dim_in)
diffusion_step_embed = tf.keras.activations.swish((self.fc_t1(diffusion_step_embed)))
diffusion_step_embed = tf.keras.activations.swish((self.fc_t2(diffusion_step_embed)))
#out = self.residual_blocks((h, tf.zeros((8,256,248)),conditional, diffusion_step_embed))
#skip = out[1]
skip = tf.zeros((8,256,248))
for n in range(self.num_res_layers):
h, skip_n = self.residual_blocks[n]((h, conditional, diffusion_step_embed))
skip += skip_n
return skip * tf.math.sqrt(1.0 / self.num_res_layers)
class SSSDS4Imputer(tf.keras.Model):
def __init__(self, in_channels, res_channels, skip_channels, out_channels,
num_res_layers,
diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(SSSDS4Imputer, self).__init__()
# convert the dimension of input from (B,in_channels,L) to (B,res_channels,L)
self.init_conv = tf.keras.layers.Conv1D(filters=res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_channels = res_channels
self.skip_channels = skip_channels
self.residual_layer = Residual_group(res_channels=res_channels,
skip_channels=skip_channels,
num_res_layers=num_res_layers,
diffusion_step_embed_dim_in=diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid=diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out=diffusion_step_embed_dim_out,
in_channels=in_channels,
s4_lmax=s4_lmax,
s4_d_state=s4_d_state,
s4_dropout=s4_dropout,
s4_bidirectional=s4_bidirectional,
s4_layernorm=s4_layernorm)
# convert the dimension from (B,skip_channels,L) to (B,out_channels,L)
self.final_conv1 = tf.keras.layers.Conv1D(filters=skip_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.final_conv2 = tf.keras.layers.Conv1D(filters=out_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='zeros', data_format='channels_first')
def call(self, input_data):
x, conditional, mask, diffusion_steps = input_data
conditional = conditional * mask
conditional = tf.concat([conditional, tf.cast(mask,dtype=tf.float32)], axis=1)
x = tf.nn.relu(self.init_conv(x))
x = self.residual_layer((x, conditional, diffusion_steps))
y = tf.nn.relu(self.final_conv1(x))
y = tf.nn.relu(self.final_conv2(y))
return y
def train_step(X, y, net,loss_fn,optimizer):
with tf.GradientTape() as tape:
logits = net(X)
loss_value = loss_fn(y, logits)
grads = tape.gradient(loss_value, net.trainable_variables,unconnected_gradients=tf.UnconnectedGradients.ZERO)
optimizer.apply_gradients(zip(grads, net.trainable_variables))
return loss_value.numpy()
if __name__ == '__main__':
model_config = {'in_channels': 12, 'out_channels': 12, 'num_res_layers': 36, 'res_channels': 256, 'skip_channels': 256,
'diffusion_step_embed_dim_in': 128, 'diffusion_step_embed_dim_mid': 512, 'diffusion_step_embed_dim_out': 512,
's4_lmax': 250, 's4_d_state': 64, 's4_dropout': 0.0, 's4_bidirectional': 1, 's4_layernorm': 1}
net = SSSDS4Imputer(**model_config)
# define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
#training
n_iter = 0
#iterator = iter(dataset)
while n_iter < 150000 + 1:
#try:
X = (tf.random.normal([8,12,248], 0, 1, tf.float32, seed=1),tf.random.normal([8,12,248], 0, 1, tf.float32, seed=2),
tf.random.normal([8,12,248], 0, 1, tf.float32, seed=2),tf.random.normal([8,1], 0, 1, tf.float32, seed=4))
y = tf.random.normal([8,12,248], 0, 1, tf.float32, seed=1)
t0 = time.time()
loss = train_step(X,y,net,tf.keras.losses.MeanSquaredError(),optimizer)
print(time.time()-t0)
n_iter += 1
The tensorflow version I use is 2.4.1

Multi-input TF model using TFRecord datasets

I’m trying to create a multi-input single output model in TensorFlow.
I load the data from TFRecs using the get_batched_data fn.
def get_batched_dataset(filenames, batch_size):
dataset = (
tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
.map(parse_tfrecord_fn, num_parallel_calls=AUTO)
.map(prepare_sample, num_parallel_calls=AUTO)
.batch(batch_size)
)
return dataset
In the above fn, I do some preprocessing with the loaded data from TFRecs using the prepare_sample fn.
def prepare_sample(features):
image = features['image']
w = tf.shape(image)[0]
h = tf.shape(image)[1]
# some type of preprocessing/data augmentation/transforms
x = {'l_eye': l_eye, 'r_eye':r_eye, 'kps':kps} #l_eye & r_eye are images, kps is numerical data
y = out
return x, y
Below is a very small version of how I’m trying to code my model architecture, just to get an idea.
class cnn_model(layers.Layer):
def __init__(self, name='cnn-model'):
super(cnn_model, self).__init__()
self.conv1 = layers.Conv2D(32, kernel_size=7, strides=2, padding='valid')
self.conv2 = layers.Conv2D(64, kernel_size=5, strides=2, padding='valid')
self.conv3 = layers.Conv2D(128, kernel_size=3, strides=1, padding='valid')
self.bn1 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.bn2 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.bn3 = layers.BatchNormalization(axis = -1, momentum=0.9)
self.leakyrelu = layers.LeakyReLU(alpha=0.01)
self.avgpool = layers.AveragePooling2D(pool_size=2)
self.dropout = layers.Dropout(rate=0.02)
def call(self, input_image):
x = self.conv1(input_image)
x = self.bn1(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
x = self.conv3(x)
x = self.bn3(x)
x = self.leakyrelu(x)
x = self.avgpool(x)
x = self.dropout(x)
return x
class num_model(layers.Layer):
def __init__(self, name='num-model'):
super(num_model, self).__init__()
self.dense1 = layers.Dense(128)
self.dense2 = layers.Dense(16)
def call(self, input_keypoints):
x = self.dense1(input_keypoints)
x = self.dense2(x)
return x
class main_model(Model):
def __init__(self, name='main-model'):
super(main_model, self).__init__()
self.cnn_model = cnn_model()
self.num_model = num_model()
self.dense1 = layers.Dense(8)
self.dense2 = layers.Dense(2)
def call(self, input_l_r_kps):
leftEye, rightEye, kps = input_l_r_kps['l_eye'], input_l_r_kps['r_eye'], input_l_r_lms['kps']
l_eye_feat = tf.reshape(self.cnn_model(leftEye), (1, 3*128*128))
r_eye_feat = tf.reshape(self.cnn_model(rightEye), (1, 3*128*128))
kp_feat = self.num_model(kps)
combined_feat = tf.concat((l_eye_feat, r_eye_feat, lm_feat),1)
x = self.dense1(combined_feat)
x = self.dense2(x)
return x
Now, the dataset returned by the get_batched_dataset fn is what I’ll be feeding into the Keras model.fit method.
train_dataset = get_batched_dataset('train.tfrec', batch_size)
valid_dataset = get_batched_dataset('valid.tfrec', batch_size)
model.fit(
x=train_dataset,
batch_size=batch_size,
epochs=1,
validation_data=valid_dataset,
use_multiprocessing=False
)
Can you please guide me where I’m going wrong? Is it in the prepare_sample fn by returning x as a dict, or somewhere in the model code? I’m really new to TF and confused.
Any help appreciated!

Keras Model works w/ 3 inputs but not 4

I'm trying to build a VAE for some time series data, but am having a hard time getting the model to work with 4 inputs instead of 3, and I'm not sure what's causing the problem.
Here's the complete code that I have:
# data for each time series
import yfinance as yf
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
# load in the data
msft = yf.Ticker('MSFT').history(period = '5y')[['Close']]
googl = yf.Ticker('GOOGL').history(period = '5y')[['Close']]
amzn = yf.Ticker('AMZN').history(period = '5y')[['Close']]
vals = np.sin(np.linspace(-100, 100, msft.shape[0]))[:, None]
# scale the data for numeric stability
msft = StandardScaler().fit_transform(msft)
googl = StandardScaler().fit_transform(googl)
amzn = StandardScaler().fit_transform(amzn)
# global variables
latent_dim = 2
batch_size = 32
sequence_length = 30
# build time series samplers for each time series
c1 = keras.utils.timeseries_dataset_from_array(
msft,
targets = None,
sequence_length = sequence_length
)
c2 = keras.utils.timeseries_dataset_from_array(
googl,
targets = None,
sequence_length = sequence_length
)
c3 = keras.utils.timeseries_dataset_from_array(
amzn,
targets = None,
sequence_length = sequence_length
)
c4 = keras.utils.timeseries_dataset_from_array(
vals,
targets = None,
sequence_length = sequence_length
)
# add the encoder for the sine wave
sin_inputs = keras.layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
sx = layers.LSTM(64, return_sequences = True)(sin_inputs)
sx = layers.LSTM(64)(sx)
# build the encoders for each of the separate time series
msft_inputs = layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
mx = layers.LSTM(64, return_sequences = True)(msft_inputs)
mx = layers.LSTM(64)(mx)
# now for google
googl_inputs = layers.Input(shape=(sequence_length, 1))
gx = layers.LSTM(64, return_sequences = True)(googl_inputs)
gx = layers.LSTM(64)(gx)
# and for amazon
amzn_inputs = layers.Input(shape = (sequence_length, 1))
ax = layers.LSTM(64, return_sequences = True)(amzn_inputs)
ax = layers.LSTM(64)(ax)
# now combine them together for a single joint time series!
x = layers.Concatenate()([mx, gx, ax, sx])
# pass into a dense layer
x = layers.Dense(64, activation = 'relu')(x)
# and finally pass them into the final decoder!
z_mean = layers.Dense(latent_dim, name = 'z_mean')(x)
z_logvar = layers.Dense(latent_dim, name = 'z_logvar')(x)
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs, sin_inputs], [z_mean, z_logvar], name = 'encoder')
class Sampler(layers.Layer):
def call(self, z_mean, z_logvar):
batch_size = tf.shape(z_mean)[0]
n_dims = tf.shape(z_mean)[1]
epsilon = tf.random.normal(shape = (batch_size, n_dims))
return z_mean + tf.exp(0.5 * z_logvar) * epsilon
latent_inputs = keras.Input(shape=(latent_dim,))
dec = layers.RepeatVector(sequence_length)(latent_inputs)
dec = layers.LSTM(64, return_sequences=True)(dec)
out = layers.TimeDistributed(layers.Dense(1))(dec)
decoder = keras.Model(latent_inputs, out)
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super().__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.sampler = Sampler()
self.total_loss_tracker = keras.metrics.Mean(name = 'total_loss')
self.reconstruction_loss_tracker = keras.metrics.Mean(name = 'reconstruction_loss')
self.kl_loss_tracker = keras.metrics.Mean(name = 'kl_loss')
#property
def metrics(self):
return [self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker]
def train_step(self, data):
with tf.GradientTape() as tape:
z_mean, z_logvar = self.encoder(data)
z = self.sampler(z_mean, z_logvar)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.binary_crossentropy(data, reconstruction),
axis = (1, 2)
)
)
kl_loss = -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar))
total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"total_loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(), run_eagerly=False)
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat(), c4.repeat()), epochs = 10, steps_per_epoch = 10)
When I fit this model I get the following error:
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: (<tf.Tensor: shape=(128, 30, 1),
My issue is that this exact same model works when I only have 3 inputs instead of 4.
If I replace the lines where I specify the inputs everything seems to work fine:
x = layers.Concatenate()([mx, gx, sx])
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs], [z_mean, z_logvar], name = 'encoder')
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat()), epochs = 10, steps_per_epoch = 10)
So I'm curious about what it is about my setup that is causing my model to break when I add the fourth input.

ValueError: Dimensions must be equal, but are 2 and 1 in time2vec example

I have 2 inputs and 4 outputs. I want to use the time2vec to predict the outputs. I have used the code in https://towardsdatascience.com/time2vec-for-time-series-features-encoding-a03a4f3f937e, it works for one input and one output. But when I want to use for (2 inputs and four outputs) it gives me the following error:
import numpy as np
import tensorflow as tf
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, Embedding, Input, concatenate,
Lambda
from sklearn.preprocessing import MinMaxScaler
from keras.callbacks import EarlyStopping
import keras
import random
import os
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras import backend as K
from kerashypetune import KerasGridSearch
import matplotlib.pyplot as plt
w = 5
ts = 10
nt = 10
ntest = nt + int(percent*nt)
X_train = np.random.rand(90,5,2)
X_test = np.random.rand(5,5,2)
y_train = np.random.rand(90,4)
y_test = np.random.rand(5,4)
""" ### DEFINE T2V LAYER ###
class T2V(Layer):
def __init__(self, output_dim=None, **kwargs):
self.output_dim = output_dim
super(T2V, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='W', shape=(1, self.output_dim), initializer='uniform',
trainable=True)
self.P = self.add_weight(name='P',shape=(1,
self.output_dim),initializer='uniform',trainable=True)
self.w = self.add_weight(name='w',shape=(1, 1),initializer='uniform', trainable=True)
self.p = self.add_weight(name='p',shape=(1, 1),initializer='uniform',trainable=True)
super(T2V, self).build(input_shape)
def call(self, x):
original = self.w * x + self.p
sin_trans = K.sin(K.dot(x, self.W) + self.P)
return K.concatenate([sin_trans, original], -1)
CREATE GENERATOR FOR LSTM AND T2V
sequence_length = w
def gen_sequence(id_df, seq_length, seq_cols):
data_matrix = id_df[seq_cols].values
num_elements = data_matrix.shape[0]
for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
yield data_matrix[start:stop, :]
def gen_labels(id_df, seq_length, label):
data_matrix = id_df[label].values
num_elements = data_matrix.shape[0]
return data_matrix[seq_length:num_elements, :]
DEFINE MODEL STRUCTURES
def set_seed_TF2(seed):
tf.random.set_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(seed)
def T2V_NN(param, dim):
inp = Input(shape=(dim,2))
x = T2V(param['t2v_dim'])(inp)
x = LSTM(param['unit'], activation=param['act'])(x)
x = Dense(2)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
return m
def NN(param, dim):
inp = Input(shape=(dim,2))
x = LSTM(param['unit'], activation=param['act'])(inp)
x = Dense(2)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
return m
Param grid
param_grid = {'unit': [64,32],'t2v_dim': [128,64],'lr': [1e-2,1e-3], 'act': ['elu','relu'], 'epochs': 1,'batch_size': [512,1024]}
FIT T2V + LSTM
es = EarlyStopping(patience=5, verbose=0, min_delta=0.001, monitor='val_loss', mode='auto',
restore_best_weights=True)
hypermodel = lambda x: T2V_NN(param=x, dim=sequence_length)
kgs_t2v = KerasGridSearch(hypermodel, param_grid, monitor='val_loss', greater_is_better=False,
tuner_verbose=1)
kgs_t2v.set_seed(set_seed_TF2, seed=33)
kgs_t2v.search(X_train, y_train, validation_split=0.2, callbacks=[es], shuffle=False)
But when I run the model, I've got this error :
ValueError: Dimensions must be equal, but are 2 and 1 for '{{node t2v_2/MatMul}} = MatMul[T=DT_FLOAT,
transpose_a=false, transpose_b=false](t2v_2/Reshape, t2v_2/Reshape_1)' with input shapes: [?,2], [1,128].
Could you help me to solve this?
You have to change the parameters inside the T2V layer and inside your network in order to correctly match the shapes
class T2V(Layer):
def __init__(self, output_dim=None, **kwargs):
self.output_dim = output_dim
super(T2V, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='W', shape=(input_shape[-1], self.output_dim),
initializer='uniform', trainable=True)
self.P = self.add_weight(name='P', shape=(input_shape[1], self.output_dim),
initializer='uniform', trainable=True)
self.w = self.add_weight(name='w', shape=(input_shape[1], 1),
initializer='uniform', trainable=True)
self.p = self.add_weight(name='p', shape=(input_shape[1], 1),
initializer='uniform', trainable=True)
super(T2V, self).build(input_shape)
def call(self, x):
original = self.w * x + self.p
sin_trans = K.sin(K.dot(x, self.W) + self.P)
return K.concatenate([sin_trans, original], -1)
create a dummy example
n_sample = 90
timesteps = 5
feat_inp = 2
feat_out = 4
X = np.random.uniform(0,1, (n_sample, timesteps, feat_inp))
y = np.random.uniform(0,1, (n_sample, feat_out))
def T2V_NN():
inp = Input(shape=(timesteps,feat_inp))
x = T2V(32)(inp)
x = LSTM(8)(x)
x = Dense(feat_out)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer='adam')
return m
model = T2V_NN()
model.fit(X,y, epochs=3)

Unhashable Type TypeError: Tensors are unhashable. Instead, use tensor.ref() as the key

I am trying to implement a custom variational autoencoder. Following is the code to reproduce.
epsilon_std = 1.0
vx = tf.keras.layers.Input(batch_shape=(None, max_length_output), name='vae_enc_in')
vx_emb = tf.keras.layers.Embedding(
vocab_tar_size,
embedding_dim,
input_length=max_length_output,
name='vae_enc_emb'
)(vx)
vxbi = tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(units, return_sequences=False, recurrent_dropout=0.2, name='vae_enc_lstm'), merge_mode='concat'
)(vx_emb)
vx_drop = tf.keras.layers.Dropout(0.2, name='vae_enc_drop')(vxbi)
vx_dense = tf.keras.layers.Dense(units, activation='linear', name='vae_enc_dense')(vx_drop)
vx_elu = tf.keras.layers.ELU(name='vae_enc_elu')(vx_dense)
vx_drop1 = tf.keras.layers.Dropout(0.2, name='vae_enc_drop2')(vx_elu)
z_mean = tf.keras.layers.Dense(20, name='vae_enc_dense2')(vx_drop1)
z_log_var = tf.keras.layers.Dense(20, name='vae_enc_dense3')(vx_drop1)
def sampling(args):
z_mean, z_log_var = args
epsilon = tf.random.normal(shape=(BATCH_SIZE, 20), mean=0.,
stddev=epsilon_std)
return z_mean + tf.math.exp(z_log_var / 2) * epsilon
z = tf.keras.layers.Lambda(sampling, output_shape=(20,), name='vae_lambda')([z_mean, z_log_var])
repeated_context = tf.keras.layers.RepeatVector(max_length_output, name='vae_repeat')
decoder_h = tf.keras.layers.LSTM(units, return_sequences=True, recurrent_dropout=0.2, name='vae_dec_lstm')
decoder_mean = tf.keras.layers.TimeDistributed(
tf.keras.layers.Dense(vocab_tar_size, activation='linear', name='vae_dec_lstm'),
name='vae_dec_time_dist'
)
h_decoded = decoder_h(repeated_context(z))
x_decoded_mean = decoder_mean(h_decoded)
def zero_loss(y_true, y_pred):
print("ZERO LOSS")
return tf.zeros_like(y_pred)
And then creating a custom vae layer
class VAELayer(tf.keras.layers.Layer):
def __init__(self, batch_size, max_len, **kwargs):
self.is_placeholder = True
super(VAELayer, self).__init__(**kwargs)
self.target_weights = tf.constant(np.ones((batch_size, max_len)), tf.float32)
def vae_loss(self, x, x_decoded_mean):
#xent_loss = K.sum(metrics.categorical_crossentropy(x, x_decoded_mean), axis=-1)
labels = tf.cast(x, tf.int32)
xent_loss = tf.math.reduce_sum(
tfa.seq2seq.sequence_loss(
x_decoded_mean,
labels,
weights=self.target_weights,
average_across_timesteps=False,
average_across_batch=False
),
axis=-1
)
#softmax_loss_function=softmax_loss_f), axis=-1)#, for sampled softmax
kl_loss = - 0.5 * tf.math.reduce_sum(1 + z_log_var - tf.math.square(z_mean) - tf.math.exp(z_log_var), axis=-1)
return tf.math.reduce_mean(xent_loss + kl_loss)
def call(self, inputs):
x = inputs[0]
x_decoded_mean = inputs[1]
print(x.shape, x_decoded_mean.shape)
loss = self.vae_loss(x, x_decoded_mean)
print("Adding loss")
self.add_loss(loss, inputs=inputs)
print("Returning ones like")
return tf.ones_like(x)
I compiled it successfully and also produced a test output by calling the model. But when i try to train, it, It produces the error
TypeError: Tensors are unhashable. (KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='tf.math.reduce_sum_25/Sum:0', description="created by layer 'tf.math.reduce_sum_25'"))Instead, use tensor.ref() as the key.
Following is the code for compiling and fitting the model
loss_layer = VAELayer(BATCH_SIZE, max_length_output)([vx, x_decoded_mean])
vae = tf.keras.Model(vx, [loss_layer], name='VariationalAutoEncoderLayer')
opt = tf.keras.optimizers.Adam(lr=0.01) #SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
vae.compile(optimizer=opt, loss=[zero_loss])
def vae_sentence_generator():
for ip, tg in train_dataset:
yield tg.numpy()
vae.fit(vae_sentence_generator(steps_per_epoch=steps_per_epoch, epochs=10))