Undefined output shape of custom Keras layer - tensorflow

I am writing a custom Keras layer that flattens all except the last dimension of the input. However, when feeding the output of the layer into the next layer an error occurs because the output shape of the layer is None in all dimensions.
class FlattenLayers( Layer ):
"""
Takes a nD tensor flattening the middle dimensions, ignoring the edge dimensions.
[ n, x, y, z ] -> [ n, xy, z ]
"""
def __init__( self, **kwargs ):
super( FlattenLayers, self ).__init__( **kwargs )
def build( self, input_shape ):
super( FlattenLayers, self ).build( input_shape )
def call( self, inputs ):
input_shape = tf.shape( inputs )
flat = tf.reshape(
inputs,
tf.stack( [
-1,
K.prod( input_shape[ 1 : -1 ] ),
input_shape[ -1 ]
] )
)
return flat
def compute_output_shape( self, input_shape ):
if not all( input_shape[ 1: ] ):
raise ValueError( 'The shape of the input to "Flatten" '
'is not fully defined '
'(got ' + str( input_shape[ 1: ] ) + '). '
'Make sure to pass a complete "input_shape" '
'or "batch_input_shape" argument to the first '
'layer in your model.' )
output_shape = (
input_shape[ 0 ],
np.prod( input_shape[ 1 : -1 ] ),
input_shape[ -1 ]
)
return output_shape
For example, when a Dense layer follows I receive the error ValueError: The last dimension of the inputs to Dense should be defined. Found None.

Why do you have tf.stack() in new shape? You want to flatten all dimensions except the last one; this is how you could do it:
import tensorflow as tf
from tensorflow.keras.layers import Layer
import numpy as np
class FlattenLayer(Layer):
def __init__( self, **kwargs):
super(FlattenLayer, self).__init__(**kwargs)
def build( self, input_shape ):
super(FlattenLayer, self).build(input_shape)
def call( self, inputs):
new_shape = self.compute_output_shape(tf.shape(inputs))
return tf.reshape(inputs, new_shape)
def compute_output_shape(self, input_shape):
new_shape = (input_shape[0]*input_shape[1]*input_shape[2],
input_shape[3])
return new_shape
Testing with a single data point (tf.__version__=='1.13.1'):
inputs = tf.keras.layers.Input(shape=(10, 10, 1))
res = tf.keras.layers.Conv2D(filters=3, kernel_size=2)(inputs)
res = FlattenLayer()(res)
model = tf.keras.models.Model(inputs=inputs, outputs=res)
x_data = np.random.normal(size=(1, 10, 10, 1))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
evaled = model.outputs[0].eval({model.inputs[0]:x_data})
print(evaled.shape) # (81, 3)

Related

Gradient = nan when using DenseVariational layer

I work with binary data (named mimic) and i want to do a bayesian model to reproduce this data. To do so, i define this model :
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = tf.keras.Sequential(
[
tfp.layers.DistributionLambda(
lambda t: tfp.distributions.MultivariateNormalDiag(
loc=tf.zeros(n), scale_diag=tf.ones(n)
)
)
]
)
return prior_model
def posterior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
posterior_model = tf.keras.Sequential(
[
tfp.layers.VariableLayer(
tfp.layers.MultivariateNormalTriL.params_size(n), dtype=dtype
),
tfp.layers.MultivariateNormalTriL(n),
]
)
return posterior_model
model = tf.keras.Sequential([
tfkl.Input(shape=(), name='dummy_input'),
tfpl.DistributionLambda(lambda t:
latentNormal,
convert_to_tensor_fn=lambda x : x.sample(batchSize)
),
tfp.layers.DenseVariational(units=inputDim,make_prior_fn=prior,make_posterior_fn=posterior,activation="sigmoid",use_bias=False),
tfpl.DistributionLambda(lambda t:
tfd.Bernoulli(probs=t)
)
])
Then i train the model :
negloglik = lambda data: -model(69).log_prob(data)
optimizer = tf.keras.optimizers.Adam()
loo=[]
kls = []
for epoch in trange(100):
# print(epoch)
# model.fit(mimic[:1453*32], mimic[:1453*32], epochs=1, batch_size=batchSize, verbose=0)
idx = np.random.choice(np.arange(mimic.shape[0]), size=3*batchSize, replace=False)
shuffled_ds = mimic.numpy()[idx]
for nBatch in range(3):
# print(nBatch)
batch = shuffled_ds[nBatch*batchSize:(1+nBatch)*batchSize]
with tf.GradientTape() as tape:
tape.watch(model.trainable_variables)
loss = negloglik(batch)
loo.append(loss)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
kl = tf.reduce_mean(tfd.kl_divergence(model(0), real_dist))
kls.append(kl.numpy())
More precisely, when i run for 1 epoch and one batch, my model is full of nan. enter image description here and the gradient is also nan enter image description here
Do you have any idea how can i solve that please ?
I tried to replace the VariationalDense by a Denser layer and everything works well. I don't get why DenseVariational is a problem here.

Stateful RNN (LSTM) in keras

imagin the following the data:
X = [x1, x2, x3, x4, x5, x6, ...]
and
Y = [y1, y2, y3, y4, ...]
the label represent the input in the following manner:
[x1,x2] -> y1
[x2,x3] -> y2
.
.
.
I am trying to make a model in using keras, so that when the classification takes place, the model remembers what it classified the previous stage to be, and make it causal as in the next prediction is directly dependent on the previous one, somewhat similar to other methods like HMM. So something like this:
Y2 = f( [x2,x3] , y1)
I have read this page, where they divide each batch into sub-batches (if that's the correct term?) and reset state between each main batch, but what I want to do is not shuffle the batches and introduce that causality into the model.
My question is how can you do this with stateful LSTMs?
One way is to do custom layer inherits from the LSTM class
[ Sample ]:
import tensorflow as tf
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Definition
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyLSTMLayer( tf.keras.layers.LSTM ):
def __init__(self, units, return_sequences, return_state):
super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
self.num_units = units
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_units])
def call(self, inputs):
return tf.matmul(inputs, self.kernel)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
start = 3
limit = 12
delta = 3
sample = tf.range( start, limit, delta )
sample = tf.cast( sample, dtype=tf.float32 )
sample = tf.constant( sample, shape=( sample.shape[0], 1, 1 ) )
layer = MyLSTMLayer( sample.shape[0], True, False )
print( sample )
print( layer(sample) )
[ Output ]:
tf.Tensor(
[[[3.]]
[[6.]]
[[9.]]], shape=(3, 1, 1), dtype=float32)
tf.Tensor(
[[[-1.8635211 2.6157026 -1.6650987]]
[[-3.7270422 5.2314053 -3.3301973]]
[[-5.5905633 7.8471084 -4.995296 ]]], shape=(3, 1, 3), dtype=float32)

tf.reshape(self.normalized_price(prce), (-1, 1)), ValueError: Shape must be rank 1 but is rank 2

I am getting the following error when I am calling the subclass of the model. My guess is that I am not passing the two parameters correctly or the reshape is not outputting the correct value.
ValueError: Shape must be rank 1 but is rank 2 for '{{node base_stock_model/concat}} = ConcatV2[N=3, T=DT_FLOAT, Tidx=DT_INT32](base_stock_model/sequential_2/embedding_2/embedding_lookup/Identity_1, base_stock_model/sequential_3/embedding_3/embedding_lookup/Identity_1, base_stock_model/Reshape, base_stock_model/concat/axis)' with input shapes: [32], [32], [1,1], [].
Here is the main class model
class StockModel(tfrs.models.Model):
def __init__(self, rating_weight: float, retrieval_weight: float) -> None:
super().__init__()
embedding_dimension = 32
self.user_model= UserModel()
self.stock_model= base_stockModel()
self.rating_model = tf.keras.Sequential([
tf.keras.layers.Dense(256, activation="relu"),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dense(1),
])
# The tasks.
self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
loss=tf.keras.losses.MeanSquaredError(),
metrics=[tf.keras.metrics.RootMeanSquaredError()],
)
self.retrieval_task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
metrics=tfrs.metrics.FactorizedTopK(
candidates=stocks.batch(1).map(self.stock_model)
)
)
# The loss weights.
self.rating_weight = rating_weight
self.retrieval_weight = retrieval_weight
def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
user_embeddings = self.user_model(features['username'])
# np.array([features["name"],features["price"]])
price=tf.as_string(features["price"])
stock_embeddings = self.stock_model([features["name"],price])
return (
user_embeddings,
stock_embeddings,
self.rating_model(
tf.concat([user_embeddings, stock_embeddings], axis=1)
),
)
def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
ratings = features.pop("Rating")
print("features",features)
user_embeddings, stock_embeddings, rating_predictions = self(features)
# We compute the loss for each task.
rating_loss = self.rating_task(
labels=ratings,
predictions=rating_predictions,
)
retrieval_loss = self.retrieval_task(user_embeddings, stock_embeddings)
# And combine them using the loss weights.
return (self.rating_weight * rating_loss
+ self.retrieval_weight * retrieval_loss)
The above main class model calls the base_stockModel class, which is causing errors.
class base_stockModel(tf.keras.Model):
def __init__(self):
super().__init__()
embedding_dimension=32
self.stock_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_stock_titles, mask_token=None),
tf.keras.layers.Embedding(len(unique_stock_titles) + 1, embedding_dimension)
])
self.price_embedding=tf.keras.Sequential([
tf.keras.layers.Discretization(prices_bucket.tolist()),
tf.keras.layers.Embedding(len(prices_bucket)+2,32)
])
self.normalized_price = tf.keras.layers.Normalization(axis=None)
self.normalized_price.adapt(prices)
def call(self,input,*args,**kwargs):
print(input.get_shape(),kwargs)
# print(tf.rank(input),[input[:]],input.get_shape(),input.dtype)
# nme=input[3]
nme=input[0]
prce=input[1]
prce=tf.strings.to_number(input[1],out_type=tf.dtypes.float32)
#print(tf.rank(self.stock_embedding(nme)),tf.rank(self.price_embedding(prce)),tf.rank(tf.reshape(sself.normalized_price(prce), (-1, 1))))
return tf.concat([
self.stock_embedding(nme),
self.price_embedding(prce),
tf.reshape(self.normalized_price(prce), (-1, 1)),
], axis=1)
This code is a variant of tensorflow recommender official page https://www.tensorflow.org/recommenders/examples/multitask/
https://www.tensorflow.org/recommenders/examples/context_features
Any help is much appreciated.
First I analyzed all the ranks of input tensor. If they were not the same rank model wants then we have to use tf.reshape() command or adjust the input to match model's demand. Note that, tf.shape() gives you the shape while running the model.
Here is documentation on it.
https://www.tensorflow.org/api_docs/python/tf/reshape
https://www.tensorflow.org/api_docs/python/tf/shape

Why is Tensorflow "save_model" failing?

I'm working in Tensorflow 2.0.0, and trying to save a model. Here's the code I'm using (thanks to #m-innat for suggesting to simplify the
example model)
class SimpleModel( tf.keras.Model ):
def __init__( self, **kwargs ):
super( SimpleModel, self ).__init__( **kwargs )
self.conv = tf.keras.layers.Conv1D( filters = 5, kernel_size = 3, padding = "SAME" )
self.dense = tf.keras.layers.Dense( 1 )
def call( self, x ):
x = self.conv( x )
x = self.dense( x )
return x
simple_model = SimpleModel()
input_shape = ( 3, 4, 5 )
x = tf.random.normal( shape = input_shape )
y = tf.random.normal( shape = ( 3, 4, 1 ) )
y_pred = simple_model( x )
print( "y_pred", y_pred )
tf.keras.models.save_model( translation_model,
"/content/gdrive/MyDrive/SimpleModel.tf", save_format = "tf" )
However, the save_model call gives an error:
AttributeError: 'NoneType' object has no attribute 'shape'
Nothing in the call stack suggests what the underlying problem is. Can you please help?
The error is related to the fact that the input shapes of the layers are not set. This can be done by calling one time the methods simple_model.fit or simple_model.predict.
For example, in your code, you can call y_pred = simple_model.predict( x ).
In this way, the model is correctly saved as I checked in the code below.
import tensorflow as tf
class SimpleModel( tf.keras.Model ):
def __init__( self, **kwargs ):
super( SimpleModel, self ).__init__( **kwargs )
self.conv = tf.keras.layers.Conv1D( filters = 5, kernel_size = 3, padding = "SAME" )
self.dense = tf.keras.layers.Dense( 1 )
def call( self, x ):
x = self.conv( x )
x = self.dense( x )
return x
simple_model = SimpleModel()
input_shape = ( 3, 4, 5 )
x = tf.random.normal( shape = input_shape )
y = tf.random.normal( shape = ( 3, 4, 1 ) )
y_pred = simple_model.predict( x )
print( "y_pred", y_pred )
tf.keras.models.save_model( simple_model,
"/content/gdrive/MyDrive/SimpleModel.tf", save_format = "tf" )
# Output:
# y_pred [[[-0.4533468 ]
# [ 1.3261242 ]
# [-1.0296338 ]
# [-1.1136482 ]] ...
model = tf.keras.models.load_model('/content/gdrive/MyDrive/SimpleModel.tf')
model.predict(x)
# Output:
#array([[[-0.4533468 ],
# [ 1.3261242 ],
# [-1.0296338 ],
# [-1.1136482 ]], ...

Using TF Estimator with TFRecord generator

I am trying to create a simple NN that reads in a folder of tfrecords. Each record has a 1024-value 'mean_rgb' vector, and a category label. I am trying to create a simple feed-forward NN that learns the categories based on this feature vector.
def generate(dir, shuffle, batch_size):
def parse(serialized):
features = {
'mean_rgb': tf.FixedLenFeature([1024], tf.float32),
'category': tf.FixedLenFeature([], tf.int64)
}
parsed_example = tf.parse_single_example(serialized=serialized, features=features)
vrv = parsed_example['mean_rgb']
label = parsed_example['category']
d = dict(zip(['mean_rgb'], [vrv])), label
return d
dataset = tf.data.TFRecordDataset(dir).repeat(1)
dataset = dataset.map(parse)
if shuffle:
dataset = dataset.shuffle(8000)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
next = iterator.get_next()
print(next)
return next
def batch_generator(dir, shuffle=False, batch_size=64):
sess = K.get_session()
while True:
yield sess.run(generate(dir, shuffle, batch_size))
num_classes = 29
batch_size = 64
yt8m_train = [os.path.join(yt8m_dir_train, x) for x in read_all_file_names(yt8m_dir_train) if '.tfrecord' in x]
yt8m_test = [os.path.join(yt8m_dir_test, x) for x in read_all_file_names(yt8m_dir_test) if '.tfrecord' in x]
feature_columns = [tf.feature_column.numeric_column(k) for k in ['mean_rgb']]
#batch_generator(yt8m_test).__next__()
classifier = tf.estimator.DNNClassifier(
feature_columns=feature_columns,
hidden_units=[1024, 1024],
n_classes=num_classes,
model_dir=model_dir)
classifier.train(
input_fn=lambda: generate(yt8m_train, True, batch_size))
However, I get the following error:
InvalidArgumentError (see above for traceback): Input to reshape is a
tensor with 65536 values, but the requested shape has 64
I am not sure why it sees the input as a 64x1024=65536 vector instead of a (64, 1024) vector. When I print the next item in the generator, I get
({'mean_rgb': <tf.Tensor: id=23, shape=(64, 1024), dtype=float32, numpy=
array([[ 0.9243997 , 0.28990048, -0.4130672 , ..., -0.096692 ,
0.27225342, 0.13346168],
[ 0.5853526 , 0.67050666, -0.24683481, ..., -0.6999033 ,
-0.4100128 , -0.00349384],
[ 0.49572858, 0.5231492 , -0.53445834, ..., 0.0449002 ,
0.10582132, -0.37333965],
...,
[ 0.5776026 , -0.07128889, -0.61762846, ..., 0.22194198,
0.61441416, -0.27355513],
[-0.01848815, 0.20132884, 1.1023484 , ..., 0.06496283,
0.29560333, 0.09157721],
[-0.25877073, -1.9552246 , 0.10309827, ..., 0.22032814,
-0.6812989 , -0.23649289]], dtype=float32)>}
which has the correct (64, 1024) shape
the problem is at how the features_columns works, for example, I had a similar problem and I solved by doing a reshape here is part of my code that will help you understand:
defining the features_column:
feature_columns = {
'images': tf.feature_column.numeric_column('images', self.shape),
}
then to create the input for the model:
with tf.name_scope('input'):
feature_columns = list(self._features_columns().values())
input_layer = tf.feature_column.input_layer(
features=features, feature_columns=feature_columns)
input_layer = tf.reshape(
input_layer,
shape=(-1, self.parameters.size, self.parameters.size,
self.parameters.channels))
if pay attention to the last part I had to reshape the tensor, the -1 is to let Tensorflow figure out the batch size
I believe the issue was that feature_columns = [tf.feature_column.numeric_column(k) for k in ['mean_rgb']] assumes that the column is a scalar - when actually it is a 1024 vector. I had to add shape=1024 to the numeric_column call. Also had to remove existing checkpoint saved model.