Tensor shapes for FFJORD bijector - tensorflow

I want to fit FFJORD bijector for transformation of two-dimensional dataset. The code is below (it is simplified version of my original code, but has the same problem).
import tensorflow as tf
import tensorflow_probability as tfp
tfb = tfp.bijectors
tfd = tfp.distributions
class ODE(tf.keras.layers.Layer):
def __init__(self):
super(ODE, self).__init__()
self.dense_layer1 = tf.keras.layers.Dense(4, activation = 'tanh')
self.dense_layer2 = tf.keras.layers.Dense(2)
def call(self, t, inputs):
return self.dense_layer2(self.dense_layer1(inputs))
ode = ODE()
ffjord = tfb.FFJORD(state_time_derivative_fn = ode)
base_distr = tfd.MultivariateNormalDiag(loc = tf.zeros(2), scale_diag = tf.ones(2))
td = tfd.TransformedDistribution(distribution = base_distr, bijector = ffjord)
x = tf.keras.Input(shape = (2,), dtype = tf.float32)
log_prob = td.log_prob(x)
model = tf.keras.Model(x, log_prob)
def NLL(y, log_prob):
return -log_prob
model.compile(optimizer = tf.optimizers.Adam(1.0e-2), loss = NLL)
history = model.fit(x = X_train, y = np.zeros(X_train.shape[0]), epochs = 100, verbose = 0, batch_size = 128)
I get error in line log_prob = td.log_prob(x): ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 2)
If I try to get a sample from transformed distribution td.sample(), it produces another error, but td.sample(1) works as well as some other calls, for example
x = tf.constant([[2.0, 3.0]])
ode(-1.0, x)
ffjord.inverse(x)
ffjord.forward(x)
td.log_prob(td.sample(5))
I guess that there is some problem with shapes, but can't understand where it is.

Related

Create Multi-Output Model with KerasNLP (CLS + MLM)

I am trying to use Keras_NLP to pretrain a model through both Masked Language Modelling and using a proxy for the CLS token to classify certain labels. With just Masked Language Modelling, it works fine but when I try to make it multi-output and also classify labels using the CLS token it seems to be unable to work
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
!pip install -q keras-nlp
import keras_nlp
import numpy as np
#Creating Random Data
x = np.random.randint(3, 100, size = (1000, 140))
#Adding Int 2 at front of each vector to act as CLS
x_cls = np.hstack([np.full((1000,1),2), x])
y = np.random.randint(0, 3, size = (1000,1)) #Random Class for each x value
#Need data for actual problem in dataset format, so using it here
dataset = tf.data.Dataset.from_tensor_slices((x_cls, y)).batch(32)
#Keras Masker
masker = keras_nlp.layers.MLMMaskGenerator(99, 0.15, mask_token_id = 0, unselectable_token_ids= [0,1,2], mask_selection_length = 15)
def process(input, y):
outputs = masker(input)
features = {
"tokens": outputs["tokens"],
"mask_positions": outputs["mask_positions"],
}
labels = outputs["mask_ids"]
weights = outputs["mask_weights"]
return features, labels, weights , y
#Apply Mask to Dataset
dataset_mask = dataset.map(lambda x, y: process(x,y))
#Create Simple Encoder, for testing purposes
input = keras.Input(shape = (141,))
embedding = layers.Embedding(99, 20)(input)
x = layers.Dense(20, activation = "relu")(embedding)
encoder = keras.Model(inputs = input, outputs = x)
#MLM prediction + CLS prediction
inputs = {
"tokens": keras.Input(shape=(141,), dtype=tf.int32),
"mask_positions": keras.Input(shape=(15,), dtype=tf.int32),
}
encoded_tokens = encoder(inputs["tokens"])
outputs = keras_nlp.layers.MLMHead(vocabulary_size = 141, activation="softmax")(encoded_tokens, inputs["mask_positions"])
x = layers.Lambda(lambda x: x[:,0,:])(encoded_tokens)
#Use lambda layer to extract embedding from 1st dim, corresponds to CLS
output_2 = layers.Dense(3, activation = "softmax")(x)
pretraining_model = keras.Model(inputs, outputs = [outputs, output_2])
pretraining_model.compile(loss=["sparse_categorical_crossentropy" "sparse_categorical_crossentropy"], optimizer = "Adam",weighted_metrics=["sparse_categorical_accuracy"], jit_compile=True)
pretraining_model.fit(dataset_mask)
The error I get is
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: ({'tokens': <tf.Tensor 'data_1:0' shape=(None, 141) dtype=int64>, 'mask_positions': <tf.Tensor 'data:0' shape=(None, 15) dtype=int64>}, <tf.Tensor 'data_2:0' shape=(None, 15) dtype=int64>, <tf.Tensor 'data_3:0' shape=(None, 15) dtype=float32>, <tf.Tensor 'data_4:0' shape=(None, 1) dtype=int64>)
Any help would be appreciated!

Keras Model works w/ 3 inputs but not 4

I'm trying to build a VAE for some time series data, but am having a hard time getting the model to work with 4 inputs instead of 3, and I'm not sure what's causing the problem.
Here's the complete code that I have:
# data for each time series
import yfinance as yf
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
# load in the data
msft = yf.Ticker('MSFT').history(period = '5y')[['Close']]
googl = yf.Ticker('GOOGL').history(period = '5y')[['Close']]
amzn = yf.Ticker('AMZN').history(period = '5y')[['Close']]
vals = np.sin(np.linspace(-100, 100, msft.shape[0]))[:, None]
# scale the data for numeric stability
msft = StandardScaler().fit_transform(msft)
googl = StandardScaler().fit_transform(googl)
amzn = StandardScaler().fit_transform(amzn)
# global variables
latent_dim = 2
batch_size = 32
sequence_length = 30
# build time series samplers for each time series
c1 = keras.utils.timeseries_dataset_from_array(
msft,
targets = None,
sequence_length = sequence_length
)
c2 = keras.utils.timeseries_dataset_from_array(
googl,
targets = None,
sequence_length = sequence_length
)
c3 = keras.utils.timeseries_dataset_from_array(
amzn,
targets = None,
sequence_length = sequence_length
)
c4 = keras.utils.timeseries_dataset_from_array(
vals,
targets = None,
sequence_length = sequence_length
)
# add the encoder for the sine wave
sin_inputs = keras.layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
sx = layers.LSTM(64, return_sequences = True)(sin_inputs)
sx = layers.LSTM(64)(sx)
# build the encoders for each of the separate time series
msft_inputs = layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
mx = layers.LSTM(64, return_sequences = True)(msft_inputs)
mx = layers.LSTM(64)(mx)
# now for google
googl_inputs = layers.Input(shape=(sequence_length, 1))
gx = layers.LSTM(64, return_sequences = True)(googl_inputs)
gx = layers.LSTM(64)(gx)
# and for amazon
amzn_inputs = layers.Input(shape = (sequence_length, 1))
ax = layers.LSTM(64, return_sequences = True)(amzn_inputs)
ax = layers.LSTM(64)(ax)
# now combine them together for a single joint time series!
x = layers.Concatenate()([mx, gx, ax, sx])
# pass into a dense layer
x = layers.Dense(64, activation = 'relu')(x)
# and finally pass them into the final decoder!
z_mean = layers.Dense(latent_dim, name = 'z_mean')(x)
z_logvar = layers.Dense(latent_dim, name = 'z_logvar')(x)
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs, sin_inputs], [z_mean, z_logvar], name = 'encoder')
class Sampler(layers.Layer):
def call(self, z_mean, z_logvar):
batch_size = tf.shape(z_mean)[0]
n_dims = tf.shape(z_mean)[1]
epsilon = tf.random.normal(shape = (batch_size, n_dims))
return z_mean + tf.exp(0.5 * z_logvar) * epsilon
latent_inputs = keras.Input(shape=(latent_dim,))
dec = layers.RepeatVector(sequence_length)(latent_inputs)
dec = layers.LSTM(64, return_sequences=True)(dec)
out = layers.TimeDistributed(layers.Dense(1))(dec)
decoder = keras.Model(latent_inputs, out)
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super().__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.sampler = Sampler()
self.total_loss_tracker = keras.metrics.Mean(name = 'total_loss')
self.reconstruction_loss_tracker = keras.metrics.Mean(name = 'reconstruction_loss')
self.kl_loss_tracker = keras.metrics.Mean(name = 'kl_loss')
#property
def metrics(self):
return [self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker]
def train_step(self, data):
with tf.GradientTape() as tape:
z_mean, z_logvar = self.encoder(data)
z = self.sampler(z_mean, z_logvar)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.binary_crossentropy(data, reconstruction),
axis = (1, 2)
)
)
kl_loss = -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar))
total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"total_loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(), run_eagerly=False)
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat(), c4.repeat()), epochs = 10, steps_per_epoch = 10)
When I fit this model I get the following error:
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: (<tf.Tensor: shape=(128, 30, 1),
My issue is that this exact same model works when I only have 3 inputs instead of 4.
If I replace the lines where I specify the inputs everything seems to work fine:
x = layers.Concatenate()([mx, gx, sx])
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs], [z_mean, z_logvar], name = 'encoder')
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat()), epochs = 10, steps_per_epoch = 10)
So I'm curious about what it is about my setup that is causing my model to break when I add the fourth input.

Siamese network with third component error

I was able to create a siamese network similar to :
https://github.com/aspamers/siamese/
The problem happens if I try to add a third model as an input to the head of my network.
I will get the following error :
ValueError: Shape must be rank 2 but is rank 3 for '{{node head/concatenate/concat}} = ConcatV2[N=3, T=DT_FLOAT, Tidx=DT_INT32](simple/Identity, simple_1/Identity, different/Identity, head/concatenate/concat/axis)' with input shapes: [?,?], [?,?], [?,?,1], [].
Here is the code below, one thing I am not comfortable with is the line processed_a = base_model1(input_a) and what it does even after checking the Keras model doc. I understand that if I don't do it I cannot have the desired shape and provide the necessary inputs to the final network.
Note that if I replace the code with what is comment and just use a pure siamese network it will work ok.
Any idea what needs to be changed to resolve the above error and what base_model1(input_a) does.
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.python.keras.layers import *
def getModel1(input_shape):
model_input = Input(shape=input_shape)
layer = layers.Dense(32, activation='relu')(model_input)
layer = layers.Flatten()(layer)
return tf.keras.Model(inputs=model_input, outputs= layer, name="simple")
def getModel3(input_shape):
model_input = Input(shape=input_shape)
layer = layers.Dense(1, activation='relu')(model_input)
return tf.keras.Model(inputs=model_input, outputs=layer, name="different")
def outputModel(models):
inputs = []
for model in models:
inputs.append(Input(shape=model.output_shape))
layer = layers.Concatenate()(inputs)
layer = layers.Dense(1)(layer)
return tf.keras.Model(inputs=inputs, outputs=layer, name="head")
dataset = []
inputs1 = []
for i in range(0, 128):
dataset.append([0.0, 1.0, 2.0])
train_dataset1 = np.asarray(dataset)
base_model1 = getModel1(train_dataset1.shape)
dataset3 = [0.0, 1.0, 2.0]
train_dataset3 = np.asarray(dataset3)
base_model3 = getModel3(train_dataset3.shape)
input_a = Input(shape=base_model1.input_shape)
input_b = Input(shape=base_model1.input_shape)
input_c = Input(shape=base_model3.input_shape)
oModel = outputModel([base_model1, base_model1, base_model3])
#oModel = outputModel([base_model1, base_model1])
processed_a = base_model1(input_a)
processed_b = base_model1(input_b)
processed_c = base_model3(input_c)
head = oModel([processed_a, processed_b, processed_c])
model = tf.keras.Model(inputs=[input_a, input_b, input_c], outputs=head, name="model")
#head = oModel([processed_a, processed_b])
#model = tf.keras.Model(inputs=[input_a, input_b], outputs=head, name="model")
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
model.predict([np.asarray([train_dataset1]), np.asarray([train_dataset1]), np.asarray([train_dataset3])])
#model.predict([np.asarray([train_dataset1]), np.asarray([train_dataset1])])
model.fit([np.asarray([train_dataset1]), np.asarray([train_dataset1]), np.asarray([train_dataset3])], np.asarray([1.0]), epochs=1000, validation_split=0, verbose=0, callbacks=[])
#model.fit([np.asarray([train_dataset1]), np.asarray([train_dataset1])], np.asarray([1.0]), epochs=1000, validation_split=0, verbose=0, callbacks=[])
pay attention to the dimensionality that you define when u initialize the model input and output. the first dimension is always the batch size (None) and this can cause u some problem. here the correct example:
def getModel1(input_shape):
model_input = Input(shape=input_shape)
layer = Dense(32, activation='relu')(model_input)
layer = Flatten()(layer)
return Model(inputs=model_input, outputs= layer, name="simple")
def getModel3(input_shape):
model_input = Input(shape=input_shape)
layer = Dense(1, activation='relu')(model_input)
return Model(inputs=model_input, outputs=layer, name="different")
def outputModel(models):
inputs = []
for model in models:
inputs.append(Input(shape=model.output_shape[1:]))
layer = Concatenate()(inputs)
layer = Dense(1)(layer)
return Model(inputs=inputs, outputs=layer, name="head")
base_model1 = getModel1((128,3))
base_model3 = getModel3((3))
input_a = Input(shape=base_model1.input_shape[1:])
input_b = Input(shape=base_model1.input_shape[1:])
input_c = Input(shape=base_model3.input_shape[1:])
oModel = outputModel([base_model1, base_model1, base_model3])
processed_a = base_model1(input_a)
processed_b = base_model1(input_b)
processed_c = base_model3(input_c)
head = oModel([processed_a, processed_b, processed_c])
model = Model(inputs=[input_a, input_b, input_c], outputs=head, name="model")
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
# create dummy data
n_sample = 5
train_dataset1 = np.random.uniform(0,1, (n_sample,128,3))
train_dataset3 = np.random.uniform(0,1, (n_sample,3))
y = np.random.uniform(0,1, n_sample)
model.fit([train_dataset1, train_dataset1, train_dataset3], y, epochs=3)
model.predict([train_dataset1, train_dataset1, train_dataset3]).shape

"Could not compute output" error using tf.keras merge layers in Tensorflow 2

I'm trying to use a merge layer in tf.keras but getting AssertionError: Could not compute output Tensor("concatenate_3/Identity:0", shape=(None, 10, 8), dtype=float32). Minimal (not)working example:
import tensorflow as tf
import numpy as np
context_length = 10
input_a = tf.keras.layers.Input((context_length, 4))
input_b = tf.keras.layers.Input((context_length, 4))
#output = tf.keras.layers.concatenate([input_a, input_b]) # same error
output = tf.keras.layers.Concatenate()([input_a, input_b])
model = tf.keras.Model(inputs = (input_a, input_b), outputs = output)
a = np.random.rand(3, context_length, 4).astype(np.float32)
b = np.random.rand(3, context_length, 4).astype(np.float32)
pred = model(a, b)
I get the same error with other merge layers (e.g. add). I'm on TF2.0.0-alpha0 but get the same with 2.0.0-beta1 on colab.
Ok well the error message was not helpful but I eventually stumbled upon the solution: the input to model needs to be an iterable of tensors, i.e.
pred = model((a, b))
works just fine.
It fails because of the tf.keras.layers.Input. Tensorflow can't validate the shape of the layer thus it fails. This will work:
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.concat = tf.keras.layers.Concatenate()
# You can also add the other layers
self.dense_1 = tf.keras.layers.Dense(10)
def call(self, a, b):
out_concat = self.concat([a, b])
out_dense = self.dense_1(out_concat)
model = MyModel()
a = np.random.rand(3, 5, 4).astype(np.float32)
b = np.random.rand(3, 5, 4).astype(np.float32)
output = model(a, b)

Using tf.keras.Model as base class for defining RNN Cell

I'm working within TensorFlow's EagerExecution to develop a variation of Variational Autoencoder (VAE) in a sequential data setting. Since both recurrent network structure and its input-output flow are not standard, I have to build my own custom RNNCell, which later can be passed to tf.nn.raw_rnn API.
In respect to building the class of the desired RNNCell, I use tf.keras.Model as the base class. But, when I passed this RNNCell to tf.nn.raw_rnn, I got nan output. What's wrong?
Here is my implementation (please tell me if you are still not clear)
import tensorflow as tf
tfe = tf.contrib.eager
tf.enable_eager_execution()
import numpy as np
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
the dataset is called 'inputs', with all bounded entries of float32 dtype and shape (time_steps, batch_size, input_depth) = (20, 1000, 4). Notice the difference of shape format compared to when using the more familiar tf.nn.dynamic_rnn API (when using the latter API, the shape is in a format of (batch_size, time_steps, input_depth)).
#defining sampling and reparameterizing function
def sampling(args):
mean, logvar = args
batch = batch_size
dim = latent_dim
# by default, random_normal has mean = 0 and std = 1.0
epsilon = tf.random_normal(shape=(batch, dim))
return mean + tf.exp(0.5 * logvar) * epsilon
#defining class of the model (PreSSM = without transition module yet)
class PreSSM(tf.keras.Model):
def __init__(self, latent_dim = 4, intermediate_dim = 4):
super(PreSSM, self).__init__()
self.latent_dim = latent_dim
self.input_dim = self.latent_dim + 4 #toy problem
inputs = Input(shape=(self.latent_dim + 4,), name='inference_input')
layer_1 = Dense(intermediate_dim, activation='relu')(inputs)
layer_2 = Dense(intermediate_dim, activation='relu')(layer_1)
mean = Dense(latent_dim, name='mean')(layer_2)
logvar = Dense(latent_dim, name='logvar')(layer_2)
s = Lambda(sampling, output_shape=(latent_dim,), name='s')([mean, logvar])
self.inference_net = Model(inputs, [mean, logvar, s], name='inference_net')
latent_inputs = Input(shape=(latent_dim,), name='s_sampling')
layer_3 = Dense(intermediate_dim, activation='relu')(latent_inputs)
layer_4 = Dense(intermediate_dim, activation='relu')(layer_3)
outputs = Dense(2)(layer_4)
self.generative_net = Model(latent_inputs, outputs, name='generative_net')
#property
def state_size(self):
return latent_dim
#property
def output_size(self):
return 2 #(x,y) coordinate
#property
def zero_state(self):
return init_state #global variable we have defined
def __call__(self, inputs, state):
next_state = self.inference_net(inputs)[-1]
output = self.generative_net(next_state)
return output, next_state
#instantiate cell == model instant
model = PreSSM()
#define a class with instant super_loop_fn(inputs) that has method called loop_fn
class SuperLoop:
def __init__(self, inputs, output_dim = 2):
inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time, clear_after_read=False)
inputs_ta = inputs_ta.unstack(inputs) #ini datanya
self.inputs_ta = inputs_ta
self.output_dim = output_dim
def loop_fn(self,time, cell_output, cell_state, loop_state):
emit_output = cell_output # ==None for time == 0
if cell_output is None: # when time == 0
next_cell_state = init_state
emit_output = tf.zeros([self.output_dim])
else :
emit_output = cell_output
next_cell_state = cell_state
elements_finished = (time >= seq_length)
finished = tf.reduce_all(elements_finished)
if finished :
next_input = tf.zeros(shape=(self.output_dim), dtype=tf.float32)
else :
next_input = tf.concat([self.inputs_ta.read(time), next_cell_state], -1)
next_loop_state = None
return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)
#defining a model
def SSM_model(inputs, RNN_cell = model, output_dim = 2):
superloop = SuperLoop(inputs, output_dim)
outputs_ta, final_state, final_loop_state = tf.nn.raw_rnn(RNN_cell, superloop.loop_fn)
outputs = outputs_ta.stack()
return outputs
#model checking
SSM_model(inputs = inputs, RNN_cell = model)
Here, the outputs are nan...
Hence I can't proceed to the training step. What's wrong? Do I miss something when defining the RNNCell using tf.keras.Model as base class in the above?