ValueError: Dimensions must be equal, but are 2 and 1 in time2vec example - tensorflow

I have 2 inputs and 4 outputs. I want to use the time2vec to predict the outputs. I have used the code in https://towardsdatascience.com/time2vec-for-time-series-features-encoding-a03a4f3f937e, it works for one input and one output. But when I want to use for (2 inputs and four outputs) it gives me the following error:
import numpy as np
import tensorflow as tf
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, Embedding, Input, concatenate,
Lambda
from sklearn.preprocessing import MinMaxScaler
from keras.callbacks import EarlyStopping
import keras
import random
import os
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras import backend as K
from kerashypetune import KerasGridSearch
import matplotlib.pyplot as plt
w = 5
ts = 10
nt = 10
ntest = nt + int(percent*nt)
X_train = np.random.rand(90,5,2)
X_test = np.random.rand(5,5,2)
y_train = np.random.rand(90,4)
y_test = np.random.rand(5,4)
""" ### DEFINE T2V LAYER ###
class T2V(Layer):
def __init__(self, output_dim=None, **kwargs):
self.output_dim = output_dim
super(T2V, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='W', shape=(1, self.output_dim), initializer='uniform',
trainable=True)
self.P = self.add_weight(name='P',shape=(1,
self.output_dim),initializer='uniform',trainable=True)
self.w = self.add_weight(name='w',shape=(1, 1),initializer='uniform', trainable=True)
self.p = self.add_weight(name='p',shape=(1, 1),initializer='uniform',trainable=True)
super(T2V, self).build(input_shape)
def call(self, x):
original = self.w * x + self.p
sin_trans = K.sin(K.dot(x, self.W) + self.P)
return K.concatenate([sin_trans, original], -1)
CREATE GENERATOR FOR LSTM AND T2V
sequence_length = w
def gen_sequence(id_df, seq_length, seq_cols):
data_matrix = id_df[seq_cols].values
num_elements = data_matrix.shape[0]
for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
yield data_matrix[start:stop, :]
def gen_labels(id_df, seq_length, label):
data_matrix = id_df[label].values
num_elements = data_matrix.shape[0]
return data_matrix[seq_length:num_elements, :]
DEFINE MODEL STRUCTURES
def set_seed_TF2(seed):
tf.random.set_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(seed)
def T2V_NN(param, dim):
inp = Input(shape=(dim,2))
x = T2V(param['t2v_dim'])(inp)
x = LSTM(param['unit'], activation=param['act'])(x)
x = Dense(2)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
return m
def NN(param, dim):
inp = Input(shape=(dim,2))
x = LSTM(param['unit'], activation=param['act'])(inp)
x = Dense(2)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
return m
Param grid
param_grid = {'unit': [64,32],'t2v_dim': [128,64],'lr': [1e-2,1e-3], 'act': ['elu','relu'], 'epochs': 1,'batch_size': [512,1024]}
FIT T2V + LSTM
es = EarlyStopping(patience=5, verbose=0, min_delta=0.001, monitor='val_loss', mode='auto',
restore_best_weights=True)
hypermodel = lambda x: T2V_NN(param=x, dim=sequence_length)
kgs_t2v = KerasGridSearch(hypermodel, param_grid, monitor='val_loss', greater_is_better=False,
tuner_verbose=1)
kgs_t2v.set_seed(set_seed_TF2, seed=33)
kgs_t2v.search(X_train, y_train, validation_split=0.2, callbacks=[es], shuffle=False)
But when I run the model, I've got this error :
ValueError: Dimensions must be equal, but are 2 and 1 for '{{node t2v_2/MatMul}} = MatMul[T=DT_FLOAT,
transpose_a=false, transpose_b=false](t2v_2/Reshape, t2v_2/Reshape_1)' with input shapes: [?,2], [1,128].
Could you help me to solve this?

You have to change the parameters inside the T2V layer and inside your network in order to correctly match the shapes
class T2V(Layer):
def __init__(self, output_dim=None, **kwargs):
self.output_dim = output_dim
super(T2V, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='W', shape=(input_shape[-1], self.output_dim),
initializer='uniform', trainable=True)
self.P = self.add_weight(name='P', shape=(input_shape[1], self.output_dim),
initializer='uniform', trainable=True)
self.w = self.add_weight(name='w', shape=(input_shape[1], 1),
initializer='uniform', trainable=True)
self.p = self.add_weight(name='p', shape=(input_shape[1], 1),
initializer='uniform', trainable=True)
super(T2V, self).build(input_shape)
def call(self, x):
original = self.w * x + self.p
sin_trans = K.sin(K.dot(x, self.W) + self.P)
return K.concatenate([sin_trans, original], -1)
create a dummy example
n_sample = 90
timesteps = 5
feat_inp = 2
feat_out = 4
X = np.random.uniform(0,1, (n_sample, timesteps, feat_inp))
y = np.random.uniform(0,1, (n_sample, feat_out))
def T2V_NN():
inp = Input(shape=(timesteps,feat_inp))
x = T2V(32)(inp)
x = LSTM(8)(x)
x = Dense(feat_out)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer='adam')
return m
model = T2V_NN()
model.fit(X,y, epochs=3)

Related

The model cannot be compiled because it has no loss to optimize

I write a vae model which posterior is GMM ,and use self.add_loss to define vae loss,but an error occur when i fit my model:
ValueError: The model cannot be compiled because it has no loss to optimize.
here is my code:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import tensorflow_probability as tfp
import numpy as np
tfd = tfp.distributions
tf.test.is_gpu_available()
# data
(x_train, x_labels), (x_val, x_val_labels) = mnist.load_data()
x_train = x_train.reshape(60000, 784).astype("float32") / 255.
x_val = x_val.reshape(10000, 784).astype("float32") / 255.
x_train[x_train >= 0.5] = 1.
x_train[x_train < 0.5] = 0.
x_val[x_val >= 0.5] = 1.
x_val[x_val < 0.5] = 0.
# from softmax to one_hot
def props_to_onehot(props):
if isinstance(props, list):
props = np.array(props)
a = np.argmax(props, axis=1)
b = np.zeros((len(a), props.shape[1]))
b[np.arange(len(a)), a] = 1
return b
# reparameter
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
class Encoder(layers.Layer):
def __init__(self, latent_dim, base_depth, components, name='encoder', **kwargs):
"""
latent_size: the dimensionality of latent variable z(also the dim of u and Σ)
base_depth: base units of Dense
components: the numbers of gussian distribution.In this case ,we set components = 10
"""
super(Encoder, self).__init__(name=name, **kwargs)
self.latent_size = latent_dim
self.base_depth = base_depth
self.components = components
# shared structured of encoder
self.dense1 = Dense(8 * self.base_depth, activation='relu', name='1')
self.dropout1 = tf.keras.layers.Dropout(0.2)
self.dense2 = Dense(4 * self.base_depth, activation='relu', name='2')
self.dropout2 = tf.keras.layers.Dropout(0.2)
self.dense3 = Dense(4 * self.base_depth, activation='relu', name='3')
self.dense4 = Dense(2 * self.base_depth, activation='relu', name='4')
self.dense5 = Dense(2 * self.base_depth, activation='relu', name='5')
# the output parameters of encoder including {pi,u,Σ}
self.parameters = Dense(self.components + self.components * 2 * self.latent_size, name='6')
self.sampling = Sampling()
def call(self, inputs):
# shared structure output
x = self.dense1(inputs)
x = self.dropout1(x)
x = self.dense2(x)
x = self.dropout2(x)
x = self.dense3(x)
x = self.dense4(x)
x = self.dense5(x)
# meaningful parameters
parameters = self.parameters(x)
pi, _ = tf.split(parameters, [self.components, 10 * 2 * self.latent_size], axis=-1)
pi = tf.nn.softmax(pi)
pi = props_to_onehot(pi)
batch_size_int = tf.shape(pi)[0].numpy()
batch_list = []
for i in range(batch_size_int):
index = np.argmax(pi[0])
batch_list.append(parameters[0][self.components + index * 2 * self.latent_size + 1:self.components + (
index + 1) * 2 * self.latent_size + 1])
batch_list = np.array(batch_list) # (batch_size,2*latent_size)
# (batch_size,latent_size);(batch_size,latent_size)
z_mean, z_log_var = tf.split(batch_list, [self.latent_size, self.latent_size], axis=-1)
z = self.sampling((z_mean, z_log_var))
kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
self.add_loss(kl_loss)
return z_mean, z_log_var, z
class Decoder(layers.Layer):
def __init__(self, base_depth, name="decoder", **kwargs):
super(Decoder, self).__init__(name=name, **kwargs)
self.base_depth = base_depth
self.dense1 = Dense(self.base_depth)
self.dense2 = Dense(2 * self.base_depth, activation='relu')
self.dense3 = Dense(4 * self.base_depth, activation='relu')
self.dropout1 = tf.keras.layers.Dropout(0.2)
self.dense4 = Dense(4 * self.base_depth, activation='relu')
self.dense5 = Dense(8 * self.base_depth, activation='relu')
self.dropout2 = tf.keras.layers.Dropout(0.2)
# no activation
self.dense_out = Dense(784)
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
x = self.dense3(x)
x = self.dropout1(x)
x = self.dense4(x)
x = self.dense5(x)
x = self.dropout2(x)
x = self.dense_out(x)
# shape=(B,784)
return x
class GMM_VAE_Posterior(tf.keras.Model):
def __init__(self, latent_dim, base_depth, components, name='auto_encoder', **kwargs):
super(GMM_VAE_Posterior, self).__init__(name=name, **kwargs)
self.latent_dim = latent_dim
self.base_depth = base_depth
self.components = components
self.encoder = Encoder(self.latent_dim, self.base_depth, self.components)
self.decoder = Decoder(self.base_depth)
def call(self, inputs):
z_mean, z_log_var, z = self.encoder(inputs)
out = self.decoder(z) # (batch_size,784)
reconstructions_error = tf.nn.sigmoid_cross_entropy_with_logits(labels=inputs, logits=out)
reconstructions_error = tf.reduce_sum(reconstructions_error, axis=-1)
reconstructions_error = tf.reduce_mean(reconstructions_error)
self.add_loss(reconstructions_error)
# shape:(batch_size,784)
return out
vae_gmm = GMM_VAE_Posterior(16, 64, 10)
vae_gmm.compile(optimizer=tf.keras.optimizers.Adam())
vae_gmm.fit(x_train, x_train, epochs=5, batch_size=64) # error
In my view,i think the computation graph of my model is not complete,so model can not BP.But it is just my gusses.
On model compiling, you must fill in the loss parameter. So, when you added the loss in another way, simply set it to None:
vae_gmm.compile(optimizer=tf.keras.optimizers.Adam(), loss = None)

Keras Inception V3 predict image not working

Learnt from Jerry Kurata on Pluralsight, I'm trying to recognize birds:
my dataset structure is:
My model training code is:
import glob
import matplotlib.pyplot as plt
from keras import backend as K
import tensorflow as tf
with K.tf.device("/device:GPU:0"):
config = tf.ConfigProto(intra_op_parallelism_threads=4,
inter_op_parallelism_threads=4, allow_soft_placement=True,
device_count = {'CPU' : 1, 'GPU' : 1})
session = tf.Session(config=config)
K.set_session(session)
from keras.callbacks import EarlyStopping
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
# "/device:GPU:0"
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def get_num_files(path):
if not os.path.exists(path):
return 0
return sum([len(files) for r, d, files in os.walk(path)])
def get_num_subfolders(path):
if not os.path.exists(path):
return 0
return sum([len(d) for r, d, files in os.walk(path)])
def create_img_generator():
return ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
Image_width, Image_height = 299, 299
Training_Epochs = 1
Batch_Size = 32
Number_FC_Neurons = 1024
train_dir = '.../birds/train'
validate_dir = '.../birds/validation'
num_train_samples = get_num_files(train_dir)
num_classes = get_num_subfolders(train_dir)
num_validate_samples = get_num_files(validate_dir)
num_epoch = Training_Epochs
batch_size = Batch_Size
train_image_gen = create_img_generator()
test_image_gen = create_img_generator()
train_generator = train_image_gen.flow_from_directory(
train_dir,
target_size=(Image_width, Image_height),
batch_size = batch_size,
seed = 42
)
validation_generator = test_image_gen.flow_from_directory(
validate_dir,
target_size=(Image_width, Image_height),
batch_size=batch_size,
seed=42
)
Inceptionv3_model = InceptionV3(weights='imagenet', include_top=False)
print('Inception v3 model without last FC loaded')
x = Inceptionv3_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(Number_FC_Neurons, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=Inceptionv3_model.input, outputs=predictions)
print(model.summary())
print('\nFine tuning existing model')
Layers_To_Freeze = 172
for layer in model.layers[:Layers_To_Freeze]:
layer.trainable = False
for layer in model.layers[Layers_To_Freeze:]:
layer.trainable = True
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy'])
cbk_early_stopping = EarlyStopping(monitor='val_acc', mode='max')
history_transfer_learning = model.fit_generator(
train_generator,
steps_per_epoch = num_train_samples,
epochs=num_epoch,
validation_data=validation_generator,
validation_steps = num_validate_samples,
class_weight='auto',
callbacks=[cbk_early_stopping]
)
model.save('incepv3_transfer.h5', overwrite=True, include_optimizer=True)
My detector is
from keras.models import load_model
from keras.optimizers import SGD
from keras.preprocessing import image
from keras.applications.inception_v3 import preprocess_input
import matplotlib.pyplot as plt
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
class Detector:
def __init__(self, model_path):
self.model = load_model(model_path)
print('input shape') # output is always (None, None, None, 3), this should be wrong
print(self.model.layers[0].input_shape)
# self.model.summary()
# self.model.compile(loss='binary_crossentropy', optimizer=SGD(lr=0.0001, momentum=0.9), metrics=['accuracy'])
def preprocess_input(self, x):
y = np.copy(x)
y /= 255.
y -= 0.5
y *= 2.
return y
def load_image(self, img_path, show=False):
img = image.load_img(img_path, target_size=(299,299))
img_tensor = image.img_to_array(img) # (height, width, channels)
img_tensor = np.expand_dims(img, axis=0) # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
# img_tensor /= 255. # imshow expects values in the range [0, 1]
img_tensor = preprocess_input(img_tensor)
if show:
plt.imshow(img_tensor[0])
plt.axis('off')
plt.show()
return img_tensor
def detect(self, img_path):
img = self.load_image(img_path, True)
classes = self.model.predict(img)
return classes
from this link
And here is how I use them to predict whether an image has a bird or not:
from keras.models import Model
from detector import Detector
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
model_path = 'incepv3_transfer.h5'
detective = Detector(model_path)
bird_img = 'b1.jpeg'
classes = detective.detect(bird_img)
print(classes)
bird_img = 'dog1.jpg'
classes = detective.detect(bird_img)
print(classes)
the output is always:
[[1.]]

Gaussian Process Regression in Tensorflow 2.0 leads to no gradients?

The following code is basically from the documentation, slightly converted to run in tensorflow 2.0. The gradients are all None. I'm not sure if this is a bug or just something I am missing:
(corrected code)
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
psd_kernels = tfp.positive_semidefinite_kernels
tf.keras.backend.set_floatx('float64')
f = lambda x: np.sin(10*x[..., 0]) * np.exp(-x[..., 0]**2)
observation_index_points = np.random.uniform(-1., 1., 50)[..., np.newaxis]
observations = f(observation_index_points) + np.random.normal(0., .05, 50)
class Model(tf.keras.models.Model):
def __init__(self):
super().__init__()
self.amplitude_ = tf.Variable(np.float64(0), trainable=True)
self.amplitude = tf.exp(self.amplitude_, name='amplitude')
self.length_scale_ = tf.Variable(np.float64(0), trainable=True)
self.length_scale = tf.exp(self.length_scale_, name='length_scale')
self.kernel = psd_kernels.ExponentiatedQuadratic(self.amplitude, self.length_scale)
self.observation_noise_variance_ = tf.Variable(np.float64(-5), trainable=True)
self.observation_noise_variance = tf.exp(self.observation_noise_variance_, name='observation_noise_variance')
def gp(self, observation_index_points):
return tfd.GaussianProcess(
kernel=self.kernel,
index_points=observation_index_points,
observation_noise_variance=self.observation_noise_variance)
def call(self, observation_index_points, observations, index_points):
return tfd.GaussianProcessRegressionModel(
kernel=self.kernel,
index_points=index_points,
observation_index_points=observation_index_points,
observations=observations,
observation_noise_variance=self.observation_noise_variance)
optimizer = tf.keras.optimizers.Adam(learning_rate=.05)
# We can construct the posterior at a new set of `index_points` using the same
# kernel (with the same parameters, which we'll optimize below).
index_points = np.linspace(-1., 1., 100)[..., np.newaxis]
model = Model()
gprm = model(observation_index_points, observations, index_points)
gp = model.gp(observation_index_points)
gp.log_prob(observations)
samples = gprm.sample(10)
trainable_variables = [model.amplitude_, model.length_scale_, model.observation_noise_variance_]
with tf.GradientTape() as tape:
loss = -gp.log_prob(observations)
print(loss)
g = tape.gradient(loss, trainable_variables)
print(g)
UPDATE:
The following example now works. Am wondering if there is a better pattern for organizing this flow in tf 2.0?
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
tfb = tfp.bijectors
tfd = tfp.distributions
psd_kernels = tfp.positive_semidefinite_kernels
m = 1000
n = 3
x = np.random.randn(m, n).astype(np.float32)
y = np.random.randn(m).astype(np.float32)
x_ = np.random.randn(100, n).astype(np.float32)
class GPRMatern(tf.keras.models.Model):
def __init__(self, feature_ndims=1):
super().__init__()
self.kernel = psd_kernels.MaternFiveHalves()
self.observation_noise_variance = tf.Variable(np.float32(.01), name='obs_noise_variance')
def gprm(self, x_obs, y_obs, x):
return tfd.GaussianProcessRegressionModel(
kernel=self.kernel,
index_points=x,
observation_index_points=x_obs,
observations=y_obs,
observation_noise_variance=self.observation_noise_variance)
def nll_for_train(self, x_obs, y_obs):
gp = tfd.GaussianProcess(
kernel=self.kernel,
index_points=x_obs,
observation_noise_variance=self.observation_noise_variance)
return -tf.reduce_mean(gp.log_prob(y_obs))
class GPRExpQuad(tf.keras.models.Model):
def __init__(self):
super().__init__()
self.amplitude = tf.Variable(np.float32(0.0), name='amplitude')
self.length_scale = tf.Variable(np.float32(0.0), name='length_scale')
self.observation_noise_variance = tf.Variable(np.float32(-5.0), name='obs_noise_variance')
#property
def kernel(self):
return psd_kernels.ExponentiatedQuadratic(tf.exp(self.amplitude), tf.exp(self.length_scale))
def nll_for_train(self, x_obs, y_obs):
gp = tfd.GaussianProcess(
kernel=self.kernel,
index_points=x_obs,
observation_noise_variance=tf.exp(self.observation_noise_variance))
return -tf.reduce_mean(gp.log_prob(y_obs))
def gprm(self, x_obs, y_obs, x):
return tfd.GaussianProcessRegressionModel(
kernel=self.kernel,
index_points=x,
observation_index_points=x_obs,
observations=y_obs,
observation_noise_variance=tf.exp(self.observation_noise_variance))
def test_model(model=GPRMatern):
model = model()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
# model.fit(x, y, epochs=steps)
for i in range(10):
with tf.GradientTape() as tape:
l = model.nll_for_train(x, y)
g = tape.gradient(l, model.trainable_variables)
optimizer.apply_gradients(zip(g, model.trainable_variables))
print({x.name: x.numpy() for x in model.trainable_variables})
matern = GPRMatern()
expquad = GPRExpQuad()
test_matern = lambda : test_model(model=GPRMatern)
test_expquad = lambda : test_model(model=GPRExpQuad)

I'd like to call 'on_epoch_end()' at each end of epoch. [keras custom generator]

I use keras and tried to define custom generator.
In generator, I expect that the function "on_epoch_end()" is called at each end of epoch, But "on_epoch_end()" never be called anytime.
Could you tell me why? Please.
from pathlib import Path
import math
from tensorflow.keras.utils import Sequence
from keras.utils import np_utils
class ImageSequence(Sequence):
def __init__(self, x, batch_size=512):
self.x_positive = x[0]
self.x_negative = x[1]
self.batch_size = batch_size
def __getitem__(self, idx):
hbs = self.batch_size//2
idx_p = np.random.randint(0, self.x_positive.shape[0], hbs)
batch_x_positive = self.x_positive[idx_p]
#
idx_n = np.random.randint(0, self.x_negative.shape[0], hbs)
batch_x_negative = self.x_negative[idx_n]
#batch_x_negative = self.x_negative[idx*hbs : (idx+1)*hbs]
#
batch_x = np.r_[batch_x_positive, batch_x_negative]
#
batch_y = np.r_[np.ones(len(batch_x_positive)), np.zeros(len(batch_x_negative))]
return batch_x, batch_y
def __len__(self):
return math.ceil(2 * len(self.x_negative) / self.batch_size)
def _shuffle(self):
self.x_negative = shuffle(self.x_negative)
def on_epoch_end(self):
self._shuffle()
data_gen = ImageSequence([train_positive, train_negative], batch_size=BATCH_SIZE)
history = model.fit_generator(
generator=data_gen,
use_multiprocessing=True,
validation_data=(x_valid, y_valid),
steps_per_epoch=2 * len(train_positive) / BATCH_SIZE,
epochs=30,
verbose=2,
callbacks=[])
I wrote my environment(version info )
import tensorflow.keras
print(tensorflow.keras.__version__)
2.1.6-tf

Keras Dense layer gets input_shape wrong

I wrote the below custom layer and when I try to add a Dense layer afterwards it gets the input_shape wrong and expects the shape[-1] dimension of the tensor before the layer.
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Conv2D, Dense, Input
class SMSO(Layer):
def __init__(self, feature_dim=256, **kwargs):
self.feature_dim = feature_dim
super(SMSO, self).__init__(**kwargs)
def build(self, input_shape):
self.scale = self.add_weight('scale',
shape=(1, self.feature_dim),
initializer='ones',
trainable=True)
self.offset = self.add_weight('offset',
shape=(1, self.feature_dim),
initializer='zeros',
trainable=True)
super(SMSO, self).build(input_shape)
def call(self, x):
x = x - K.mean(x, axis=(1, 2), keepdims=True)
x = K.square(Conv2D(self.feature_dim, 1)(x))
x = K.sqrt(K.sum(x, axis=(1, 2)))
return self.scale * x + self.offset
x = Input(shape=(10, 10, 32))
l1 = SMSO(16)(x)
print(l1.shape)
l2 = Dense(10)(l1)
Here is the code to reproduce the error. l1.shape gives (?, 16) as expected but the next line fails.
Adding a compute_output_shape function solves the problem.
def compute_output_shape(self, input_shape):
return (input_shape[0], self.feature_dim)
Any layer that modifies shape needs to have a compute_output_shape.