Keras Dense layer gets input_shape wrong - tensorflow

I wrote the below custom layer and when I try to add a Dense layer afterwards it gets the input_shape wrong and expects the shape[-1] dimension of the tensor before the layer.
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Conv2D, Dense, Input
class SMSO(Layer):
def __init__(self, feature_dim=256, **kwargs):
self.feature_dim = feature_dim
super(SMSO, self).__init__(**kwargs)
def build(self, input_shape):
self.scale = self.add_weight('scale',
shape=(1, self.feature_dim),
initializer='ones',
trainable=True)
self.offset = self.add_weight('offset',
shape=(1, self.feature_dim),
initializer='zeros',
trainable=True)
super(SMSO, self).build(input_shape)
def call(self, x):
x = x - K.mean(x, axis=(1, 2), keepdims=True)
x = K.square(Conv2D(self.feature_dim, 1)(x))
x = K.sqrt(K.sum(x, axis=(1, 2)))
return self.scale * x + self.offset
x = Input(shape=(10, 10, 32))
l1 = SMSO(16)(x)
print(l1.shape)
l2 = Dense(10)(l1)
Here is the code to reproduce the error. l1.shape gives (?, 16) as expected but the next line fails.

Adding a compute_output_shape function solves the problem.
def compute_output_shape(self, input_shape):
return (input_shape[0], self.feature_dim)
Any layer that modifies shape needs to have a compute_output_shape.

Related

How to write a custom call function for a Tensorflow LSTM class?

I have defined a custom LSTM Layer as follows:
class LSTMModel(tf.keras.Model):
def __init__(self, CNN_model, num_classes):
super().__init__()
self.cnn_model = CNN_model
self.lstm = tf.keras.layers.LSTM(units=64, return_state=True, dropout=0.3)
self.dense = tf.keras.layers.Dense(num_classes, activation="softmax")
def call(self, input):
pass
However, I am unclear what needs too occur in the call function here. I also wrote a generic CNN class like below:
class generic_vns_function(tf.keras.Model):
# Where would we use layer_units here?
def __init__(self, input_shape, layers, layer_units):
super().__init__()
self.convolutions = []
# Dynamically create Convolutional layers and MaxPools
for layer in range(len(layers)):
self.convolutions.append(tf.keras.layers.Conv2D(layer, 3, padding="same",
input_shape=input_shape, activation="relu"))
# Add MaxPooling layer
self.convolutions.append(tf.keras.layers.MaxPooling2D((2,2)))
# Flatten
self.flatten = tf.keras.layers.Flatten()
# Dense layer
self.dense1 = tf.keras.layers.Dense(1024, activation="relu")
def call(self, input):
x = input
for layer in self.convolutions:
x = layer(x)
x = self.flatten(x)
x = self.dense1(x)
return x
but here the required structure makes a lot more sense to me. I am just initializing all of the layers. What do I need to do to initialize my LSTM layers?
You could write it like this:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras import Model
class LSTMModel(Model):
def __init__(self, num_classes, num_units=64, drop_prob=0.3):
super().__init__()
self.num_classes = num_classes
self.num_units = num_units
self.drop_prob = drop_prob
self.lstm = LSTM(
units=self.num_units,
return_state=True,
dropout=self.drop_prob)
self.dense = Dense(
num_classes,
activation="softmax")
def call(self, x, training=True):
x, *state = self.lstm(x, training=training)
x = self.dense(x)
return x
And then you would use it like:
model = LSTMModel(num_classes=2)
time_series = tf.random.normal((32, 64, 128))
x_pred = model(time_series)
# loss and gradients calculations ...
It is a common tensorflow idom to instantiate layers when initializing a custom layer/model, and then execute their call() methods by passing data through them in your custom call implementation.

ValueError: Dimensions must be equal, but are 2 and 1 in time2vec example

I have 2 inputs and 4 outputs. I want to use the time2vec to predict the outputs. I have used the code in https://towardsdatascience.com/time2vec-for-time-series-features-encoding-a03a4f3f937e, it works for one input and one output. But when I want to use for (2 inputs and four outputs) it gives me the following error:
import numpy as np
import tensorflow as tf
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, Embedding, Input, concatenate,
Lambda
from sklearn.preprocessing import MinMaxScaler
from keras.callbacks import EarlyStopping
import keras
import random
import os
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras import backend as K
from kerashypetune import KerasGridSearch
import matplotlib.pyplot as plt
w = 5
ts = 10
nt = 10
ntest = nt + int(percent*nt)
X_train = np.random.rand(90,5,2)
X_test = np.random.rand(5,5,2)
y_train = np.random.rand(90,4)
y_test = np.random.rand(5,4)
""" ### DEFINE T2V LAYER ###
class T2V(Layer):
def __init__(self, output_dim=None, **kwargs):
self.output_dim = output_dim
super(T2V, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='W', shape=(1, self.output_dim), initializer='uniform',
trainable=True)
self.P = self.add_weight(name='P',shape=(1,
self.output_dim),initializer='uniform',trainable=True)
self.w = self.add_weight(name='w',shape=(1, 1),initializer='uniform', trainable=True)
self.p = self.add_weight(name='p',shape=(1, 1),initializer='uniform',trainable=True)
super(T2V, self).build(input_shape)
def call(self, x):
original = self.w * x + self.p
sin_trans = K.sin(K.dot(x, self.W) + self.P)
return K.concatenate([sin_trans, original], -1)
CREATE GENERATOR FOR LSTM AND T2V
sequence_length = w
def gen_sequence(id_df, seq_length, seq_cols):
data_matrix = id_df[seq_cols].values
num_elements = data_matrix.shape[0]
for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
yield data_matrix[start:stop, :]
def gen_labels(id_df, seq_length, label):
data_matrix = id_df[label].values
num_elements = data_matrix.shape[0]
return data_matrix[seq_length:num_elements, :]
DEFINE MODEL STRUCTURES
def set_seed_TF2(seed):
tf.random.set_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(seed)
def T2V_NN(param, dim):
inp = Input(shape=(dim,2))
x = T2V(param['t2v_dim'])(inp)
x = LSTM(param['unit'], activation=param['act'])(x)
x = Dense(2)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
return m
def NN(param, dim):
inp = Input(shape=(dim,2))
x = LSTM(param['unit'], activation=param['act'])(inp)
x = Dense(2)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
return m
Param grid
param_grid = {'unit': [64,32],'t2v_dim': [128,64],'lr': [1e-2,1e-3], 'act': ['elu','relu'], 'epochs': 1,'batch_size': [512,1024]}
FIT T2V + LSTM
es = EarlyStopping(patience=5, verbose=0, min_delta=0.001, monitor='val_loss', mode='auto',
restore_best_weights=True)
hypermodel = lambda x: T2V_NN(param=x, dim=sequence_length)
kgs_t2v = KerasGridSearch(hypermodel, param_grid, monitor='val_loss', greater_is_better=False,
tuner_verbose=1)
kgs_t2v.set_seed(set_seed_TF2, seed=33)
kgs_t2v.search(X_train, y_train, validation_split=0.2, callbacks=[es], shuffle=False)
But when I run the model, I've got this error :
ValueError: Dimensions must be equal, but are 2 and 1 for '{{node t2v_2/MatMul}} = MatMul[T=DT_FLOAT,
transpose_a=false, transpose_b=false](t2v_2/Reshape, t2v_2/Reshape_1)' with input shapes: [?,2], [1,128].
Could you help me to solve this?
You have to change the parameters inside the T2V layer and inside your network in order to correctly match the shapes
class T2V(Layer):
def __init__(self, output_dim=None, **kwargs):
self.output_dim = output_dim
super(T2V, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='W', shape=(input_shape[-1], self.output_dim),
initializer='uniform', trainable=True)
self.P = self.add_weight(name='P', shape=(input_shape[1], self.output_dim),
initializer='uniform', trainable=True)
self.w = self.add_weight(name='w', shape=(input_shape[1], 1),
initializer='uniform', trainable=True)
self.p = self.add_weight(name='p', shape=(input_shape[1], 1),
initializer='uniform', trainable=True)
super(T2V, self).build(input_shape)
def call(self, x):
original = self.w * x + self.p
sin_trans = K.sin(K.dot(x, self.W) + self.P)
return K.concatenate([sin_trans, original], -1)
create a dummy example
n_sample = 90
timesteps = 5
feat_inp = 2
feat_out = 4
X = np.random.uniform(0,1, (n_sample, timesteps, feat_inp))
y = np.random.uniform(0,1, (n_sample, feat_out))
def T2V_NN():
inp = Input(shape=(timesteps,feat_inp))
x = T2V(32)(inp)
x = LSTM(8)(x)
x = Dense(feat_out)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer='adam')
return m
model = T2V_NN()
model.fit(X,y, epochs=3)

How to make a Keras Dense Layer deal with 3D tensor as input for this Softmax Fully Connected Layer?

I am working on a custom problem, and i have to change the fully connected layer (Dense with softmax), My model code is something like this (with Keras Framework):
.......
batch_size = 8
inputs = tf.random.uniform(shape=[batch_size,1024,256],dtype=tf.dtypes.float32)
preds = Dense(num_classes,activation='softmax')(x) #final layer with softmax activation
....
model = Model(inputs=base_model.input,outputs=preds)
So, i have to change the Code of Dense Layer to output a Tensor of probabilities with the shape of [batch_size, 1024, num_classes], without using a for loop, i need it to be optimized and not a consuming time function
The Dense code version that i want to change:
class Dense(Layer):
"""Just your regular densely-connected NN layer.
`Dense` implements the operation:
`output = activation(dot(input, kernel) + bias)`
where `activation` is the element-wise activation function
passed as the `activation` argument, `kernel` is a weights matrix
created by the layer, and `bias` is a bias vector created by the layer
(only applicable if `use_bias` is `True`).
Note: if the input to the layer has a rank greater than 2, then
it is flattened prior to the initial dot product with `kernel`.
# Example
```python
# as first layer in a sequential model:
model = Sequential()
model.add(Dense(32, input_shape=(16,)))
# now the model will take as input arrays of shape (*, 16)
# and output arrays of shape (*, 32)
# after the first layer, you don't need to specify
# the size of the input anymore:
model.add(Dense(32))
```
# Arguments
units: Positive integer, dimensionality of the output space.
activation: Activation function to use
(see [activations](../activations.md)).
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
nD tensor with shape: `(batch_size, ..., input_dim)`.
The most common situation would be
a 2D input with shape `(batch_size, input_dim)`.
# Output shape
nD tensor with shape: `(batch_size, ..., units)`.
For instance, for a 2D input with shape `(batch_size, input_dim)`,
the output would have shape `(batch_size, units)`.
"""
def __init__(self, units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(Dense, self).__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = InputSpec(min_ndim=2)
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs):
output = K.dot(inputs, self.kernel)
if self.use_bias:
output = K.bias_add(output, self.bias)
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def get_config(self):
config = {
'units': self.units,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(Dense, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
There are three different ways in which this can be done (that I can think of). If you want to have a single dense layer, that maps a vector of 256 elements to a vector of num_classes elements, and apply it all across your batch of data (that is, use the same 256 x num_classes matrix of weights for every sample), then you don't need to do anything special, just use a regular Dense layer:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2570
Another way would be to have a single huge Dense layer that takes all 1024 * 256 values in at the same time and produces all 1024 * num_classes values at the output, that is, a layer with a matrix of weights with shape (1024 * 256) x (1024 * num_classes) (in the order if gigabytes of memory!). This is easy to do too, although it seems unlikely to be what you need:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Flatten, Dense, Reshape, Softmax
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
res = Flatten()(inp)
# This takes _a lot_ of memory!
layer = Dense(1024 * num_classes, activation=None)
out_res = layer(res)
# Apply softmax after reshaping
out_preact = Reshape((-1, num_classes))(out_res)
out = Softmax()(out_preact)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2684364800
Finally, you may want to have a set of 1024 weight matrices, each one applied to the corresponding sample in the input, which would imply an array of weights with shape (1024, 256, num_classes). I don't think this can be done with one of the standard Keras layers (or don't know how to)1, but it's easy enough to write a custom layer based on Dense to do that:
import tensorflow as tf
from tensorflow.keras.layers import Dense, InputSpec
class Dense2D(Dense):
def __init__(self, *args, **kwargs):
super(Dense2D, self).__init__(*args, **kwargs)
def build(self, input_shape):
assert len(input_shape) >= 3
input_dim1 = input_shape[-2]
input_dim2 = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim1, input_dim2, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(input_dim1, self.units),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=3, axes={-2: input_dim1, -1: input_dim2})
self.built = True
def call(self, inputs):
# Multiply each set of weights with each input element
output = tf.einsum('...ij,ijk->...ik', inputs, self.kernel)
if self.use_bias:
output += self.bias
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 3
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
You would then use it like this:
import tensorflow as tf
from tensorflow.keras import Input
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense2D(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680
1: As today points out in the comments, you can actually use a LocallyConnected1D layer to do the same that I tried to do with my Dense2D layer. It is as simple as this:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import LocallyConnected1D
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = LocallyConnected1D(num_classes, 1, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680

Is there a tensorflow keras that is a wrapper for a stack of Dense layers?

For example, this is trivial but is there a layer for this? Is not really a convolution ... there is one "Dense layer" (weights) per data point.
In [266]: X = np.random.randn(10, 3); W = np.random.randn(10, 3, 4); (X[:, :, None] * W).sum(axis=1).shape
Out[266]: (10, 4)
Create your own layer:
Warning: works only with fixed batch size, you need to define batch_shape or batch_input_shape in your models!!!!
class SampleDense(Layer):
def __init__(self, units, **kwargs):
self.units = units
super(SampleDense, self).__init__(**kwargs)
def build(self, input_shape):
weight_shape = input_shape + (self.units,)
self.kernel = self.add_weight(name='kernel',
shape=weight_shape,
initializer='uniform',
trainable=True)
self.built = True
def call(self, inputs):
inputs = K.expand_dims(inputs, axis=-1)
outputs = inputs * self.kernel
outputs = K.sum(outputs, axis=-2)
return outputs
def compute_output_shape(self, input_shape):
return input_shape[:-1] + (self.units,)

Propagating through a custom layer in tensorflow just once

Given a custom layer in tensorflow, is it possible to let the model use it just during one epoch? The layer may just be ignored for all other epochs or simple be an identity.
For example: Given data I would like the layer to simply double the given data. The other layers should may work normally. How would one do that?
def do_stuff(data):
return 2*data
def run_once(data):
return tf.py_func(do_stuff,
[data],
'float32',
stateful=False,
name='I run once')
class CustomLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
self.trainable = False
super(CustomLayer, self).__init__(**kwargs)
def call(self, x):
res = tf.map_fn(run_once, x)
res.set_shape([x.shape[0],
self.output_dim[1],
self.output_dim[0],
x.shape[-1]])
return res
inputs = Input(shape=(224, 224, 1))
x = Lambda(preprocess_input(x), input_shape=(224, 224, 1), output_shape=(224, 224, 3))
outputs = Dense(1)(x)
model = Model(input=inputs, output=outputs)
output = model(x)
Interesting question. To execute a TF operation just in the first epoch, one could use tf.cond and tf.control_dependencies to check/update the value of a boolean tensor. For example, your custom layer could be implemented as follows:
class CustomLayer(Layer):
def __init__(self, **kwargs):
super(CustomLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.first_epoch = tf.Variable(True)
def call(self, x):
res = tf.cond(self.first_epoch,
true_fn=lambda: run_once(x),
false_fn=lambda: x)
with tf.control_dependencies([res]):
assign_op = self.first_epoch.assign(False)
with tf.control_dependencies([assign_op]):
res = tf.identity(res)
return res
To validate that this layer works as expected, define run_once as:
def run_once(data):
print_op = tf.print('First epoch')
with tf.control_dependencies([print_op]):
out = tf.identity(data)
return out