Training by party keras - tensorflow

I need your help.
my project in general is about the development of an image analysis algorithm for the quantification of ferritin in sclera.
My code allows to mix segmentation and regression with the sequential model and the output is unique (i.e. the training is done at the same time). I want it to be per part. i.e., it does the segmentation training, it records the result and starts the regression one.
the inputs of the segmentation are the images plus their masks;
the inputs of the regressions are the images plus the values of ferritins.
Segmentation and regression layers are renamed
because some of them share the same name when backbones are used
def load_model(segmentation_model, regression_model, width, height, num_classes = 1):
# Rename segmentation model layers and weights
for layer in segmentation_model.layers:
rename(segmentation_model, layer, layer.name + '_seg')
#for i, w in enumerate(segmentation_model.weights):
# split_name = w.name.split('/')
# new_name = split_name[0] + '_seg' + '/' + split_name[1]
# segmentation_model.weights[i]._handle_name = new_name
# Rename regression model layers
for layer in regression_model.layers:
rename(regression_model, layer, layer.name + '_reg')
#for i, w in enumerate(regression_model.weights):
# split_name = w.name.split('/')
# new_name = split_name[0] + '_reg' + '/' + split_name[1]
# regression_model.weights[i]._handle_name = new_name
image = layers.Input(shape=(width, height, 3), name="img")
mask_image = segmentation_model(image)
if num_classes==1:
mask_image_categorical = K.cast(K.squeeze(mask_image, axis=3) + 0.5, dtype='int32') # Threshold at 0.5
else:
mask_image_categorical = K.argmax(mask_image, axis=3)
masked_layer = mylayers.CustomMasking(mask_value=0)
masked_image = masked_layer.call([image, mask_image_categorical])
value = regression_model(masked_image)
m = models.Model(inputs=image, outputs=[mask_image, value])
#m = models.Model(inputs=image, outputs=[mask_image, value, mask_image_categorical, masked_image])
#for i, w in enumerate(m.weights): print(i, w.name)
m.summary()
return m
def make_train(model, regression_loss_weight, regression_loss_weight_max, train_generator, epochs, val_dataset, validation_steps, weights_path, logger_path, num_classes, focal, enable_plot, init_mode = False):
optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
regression_loss_weight_variable = K.variable(regression_loss_weight, name='regression_loss_weight')
mse = losses.MeanSquaredError()
weighted_mse = weighted_loss(mse, regression_loss_weight_variable)
weighted_mse.trainable = False
if focal:
if num_classes == 1:
bce = mylosses.binary_focal_loss
weighted_bce = weighted_loss(bce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_bce, 'model_regression' : weighted_mse}
else:
fce = mylosses.categorical_focal_loss
weighted_fce = weighted_loss(fce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_fce, 'model_regression' : weighted_mse}
else:
if num_classes == 1:
bce = losses.BinaryCrossentropy(from_logits=True)
weighted_bce = weighted_loss(bce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_bce, 'model_regression' : weighted_mse}
else:
cce = losses.CategoricalCrossentropy()
weighted_cce = weighted_loss(cce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_cce, 'model_regression' : weighted_mse}
metric = metrics.BinaryAccuracy() if num_classes == 1 else metrics.CategoricalAccuracy()
metric_array_segmentation = [metric, mymetrics.iou_score_threshold, mymetrics.f1_score_threshold]
metric_array_regression = [metrics.RootMeanSquaredError(), metrics.MeanAbsoluteError(), metrics.MeanAbsolutePercentageError()]
metric_dict = { 'model_segmentation' : metric_array_segmentation, 'model_regression' : metric_array_regression}
loss_weights = [1.0, 1.0] # Weight for regression is taken into account in weighted_mse loss function
model.compile(optimizer, loss, metric_dict, loss_weights)
loss_weight_callback = LossWeightsCallback(regression_loss_weight_variable, regression_loss_weight, regression_loss_weight_max, epochs, 'val_model_segmentation_f1-score')
freezing_callback = SegmentationModelFreezingCallback(model, 'val_model_segmentation_f1-score', 0.95)
checkpoint = ModelCheckpoint(weights_path, monitor='val_model_regression_root_mean_squared_error', mode='min', verbose=1, save_best_only=True)
csv_logger = CSVLogger(logger_path, append=True, separator=';')
lr_reducer = ReduceLROnPlateau(monitor='val_model_regression_root_mean_squared_error', mode='min', factor=0.2, patience=10, min_lr=10e-7, min_delta=0.01, verbose=1)
earlystopping = EarlyStopping(monitor='val_model_regression_root_mean_squared_error', mode='min', verbose=1, patience=20, restore_best_weights=True)
callbacks_list = [loss_weight_callback, freezing_callback, checkpoint, csv_logger, lr_reducer, earlystopping]
# Test custom masking layer or global model
#instance = train_generator[0]
#imgs = np.squeeze(instance[0], axis=3) if instance[0].shape[3] == 1 else instance[0]
#imsave("unmasked_img.png", imgs[0])
#masks = np.squeeze(instance[1]['model_segmentation'], axis=3) if instance[1]['model_segmentation'].shape[3] == 1 else instance[1]['model_segmentation']
#imsave("mask.png", masks[0] * 255)
#masked_layer = mylayers.CustomMasking(mask_value=0)
#masked_imgs = masked_layer.call([imgs, masks])
#img = K.eval(masked_imgs[0,:,:,:])
#imsave("masked_img.png", img)
#y = model(imgs)
#mask_image = y[0][0,:,:,:]
#value = K.eval(y[1][0])
if init_mode:
instance = train_generator[0]
model.train_on_batch(instance[0][:1], [instance[1]['model_segmentation'][:1], instance[1]['model_regression'][:1]] )
else:
results = model.fit(train_generator, epochs=epochs, validation_data=val_dataset, validation_steps=validation_steps, callbacks=callbacks_list)
model.save_weights(weights_path)
# Display of metrics and loss vs epochs: metric names must match the metric functions
if enable_plot:
if num_classes == 1:
plot_history(results,
metrics = ['model_segmentation_binary_accuracy', 'val_model_segmentation_binary_accuracy', 'model_regression_root_mean_squared_error', 'val_model_regression_root_mean_squared_error', 'model_regression_mean_absolute_error', 'val_model_regression_mean_absolute_error', 'model_regression_mean_absolute_percentage_error', 'val_model_regression_mean_absolute_percentage_error'],
losses = ['model_segmentation_loss', 'model_regression_loss', 'val_model_segmentation_loss', 'val_model_regression_loss'])
else:
plot_history(results,
metrics = ['model_segmentation_categorical_accuracy', 'val_model_segmentation_categorical_accuracy', 'model_segmentation_mean_io_u', 'val_model_segmentation_mean_io_u', 'model_segmentation_f1-score', 'val_model_segmentation_f1-score', 'model_regression_root_mean_squared_error', 'val_model_regression_root_mean_squared_error', 'model_regression_mean_absolute_error', 'val_model_regression_mean_absolute_error', 'model_regression_mean_absolute_percentage_error', 'val_model_regression_mean_absolute_percentage_error'],
losses = ['model_segmentation_loss', 'model_regression_loss', 'val_model_segmentation_loss', 'val_model_regression_loss'])
Defining model and loading weights is not enough when using custom model and/or layer
Model must also be compiled and trained on a minimal set in order to initialize
def init_model(model, loss_weight, loss_weight_max, train_generator, epochs, weights_path, logger_path, num_classes, focal):
make_train(model, loss_weight, loss_weight_max, train_generator, epochs, None, None, '', logger_path, num_classes, focal, False, True)
model.load_weights(weights_path)

Related

How to apply MeanIoU to multioutput model?

I have a model based on MobileNet v2 with 2 outputs: class (cat/dog) and face coordinates. So "class" output has BinaryCrossentropy loss and "bbox" output has YOLO los (in code).
The problem is, when I try to apply metrics (accuracy for class and MeanIOU to bbox):
On random dataset: they show strange results (accuracy == 0, miou == 1 all the time).
On real dataset (images and labels): fit() throws error:
TypeError: '>' not supported between instances of 'NoneType' and 'int'
I suspect that I messed up outputs and metrics somehow, it would be nice if someone with more experience in tensorflow would take a look at it.
There is the code (with random dataset to reproduce):
import tensorflow as tf
# generate fake dataset
IMG_SIZE = 200
num_of_samples = 2000
images = tf.random.uniform((num_of_samples, IMG_SIZE, IMG_SIZE, 3), minval=0, maxval=1)
images = tf.data.Dataset.from_tensor_slices(images)
label_classes = tf.random.uniform((num_of_samples, 1), minval=0, maxval=2, dtype=tf.int32)
label_classes = tf.data.Dataset.from_tensor_slices(label_classes)
label_coords = tf.random.uniform((num_of_samples, 4), minval=0, maxval=1, dtype=tf.float16)
label_coords = tf.data.Dataset.from_tensor_slices(label_coords)
labels = tf.data.Dataset.zip((label_classes, label_coords))
train = tf.data.Dataset.zip((images, labels))
train = train.shuffle(num_of_samples)
train = train.batch(8)
train = train.prefetch(4)
num_of_samples = 500
images = tf.random.uniform((num_of_samples, IMG_SIZE, IMG_SIZE, 3), minval=0, maxval=1)
images = tf.data.Dataset.from_tensor_slices(images)
label_classes = tf.random.uniform((num_of_samples, 1), minval=0, maxval=2, dtype=tf.int32)
label_classes = tf.data.Dataset.from_tensor_slices(label_classes)
label_coords = tf.random.uniform((num_of_samples, 4), minval=0, maxval=1, dtype=tf.float16)
label_coords = tf.data.Dataset.from_tensor_slices(label_coords)
labels = tf.data.Dataset.zip((label_classes, label_coords))
valid = tf.data.Dataset.zip((images, labels))
valid = valid.shuffle(num_of_samples)
valid = valid.batch(8)
valid = valid.prefetch(4)
# Model with two outputs
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import MobileNetV2
def cnn_from_transfer():
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
input_layer = Input(shape=IMG_SHAPE)
base_net = MobileNetV2(include_top=False, weights='imagenet')(input_layer)
# Classification
h1 = GlobalMaxPooling2D()(base_net)
class1 = Dense(2048, activation='relu')(h1)
class2 = Dense(1, activation='sigmoid')(class1)
# Bounding box
h2 = GlobalMaxPooling2D()(base_net)
regress1 = Dense(2048, activation='relu')(h2)
regress2 = Dense(4, activation='sigmoid')(regress1)
return Model(inputs=input_layer, outputs=[class2, regress2])
model = cnn_from_transfer()
# Losses
def localization_loss(y_true, yhat):
delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2]))
h_true = y_true[:,3] - y_true[:,1]
w_true = y_true[:,2] - y_true[:,0]
h_pred = yhat[:,3] - yhat[:,1]
w_pred = yhat[:,2] - yhat[:,0]
delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
return delta_coord + delta_size
classloss = tf.keras.losses.BinaryCrossentropy()
regressloss = localization_loss
# Train
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, decay=0.00001),
loss=[classloss, regressloss],
metrics=[[tf.keras.metrics.Accuracy()], [tf.keras.metrics.MeanIoU(num_classes=2)]],
)
history = model.fit(train, epochs=5, validation_data=valid)
What am I doing wrong?
As M.Innat said in comments, tf MeanIoU is not applicable to my case (bboxes), so I need to make my own custom metric (iou_metric function below). Moreover I found out that BinaryAccuracy should be used instead of regular Accuracy. The rest is fine.
Here is correct code:
def iou_metric(y_true, y_pred):
aog = tf.abs(tf.transpose(y_true)[2] - tf.transpose(y_true)[0] + 1) * tf.abs(tf.transpose(y_true)[3] - tf.transpose(y_true)[1] + 1)
aop = tf.abs(tf.transpose(y_pred)[2] - tf.transpose(y_pred)[0] + 1) * tf.abs(tf.transpose(y_pred)[3] - tf.transpose(y_pred)[1] + 1)
overlap_0 = tf.maximum(tf.transpose(y_true)[0], tf.transpose(y_pred)[0])
overlap_1 = tf.maximum(tf.transpose(y_true)[1], tf.transpose(y_pred)[1])
overlap_2 = tf.minimum(tf.transpose(y_true)[2], tf.transpose(y_pred)[2])
overlap_3 = tf.minimum(tf.transpose(y_true)[3], tf.transpose(y_pred)[3])
intersection = (overlap_2 - overlap_0 + 1) * (overlap_3 - overlap_1 + 1)
union = aog + aop - intersection
iou = intersection / union
iou = tf.keras.backend.clip(iou, 0.0 + tf.keras.backend.epsilon(), 1.0 - tf.keras.backend.epsilon())
return iou
model.compile(
optimizer = opt,
loss=[classloss, regressloss],
metrics=[[tf.keras.metrics.BinaryAccuracy()], [iou_metric]],
)

Keras Model works w/ 3 inputs but not 4

I'm trying to build a VAE for some time series data, but am having a hard time getting the model to work with 4 inputs instead of 3, and I'm not sure what's causing the problem.
Here's the complete code that I have:
# data for each time series
import yfinance as yf
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
# load in the data
msft = yf.Ticker('MSFT').history(period = '5y')[['Close']]
googl = yf.Ticker('GOOGL').history(period = '5y')[['Close']]
amzn = yf.Ticker('AMZN').history(period = '5y')[['Close']]
vals = np.sin(np.linspace(-100, 100, msft.shape[0]))[:, None]
# scale the data for numeric stability
msft = StandardScaler().fit_transform(msft)
googl = StandardScaler().fit_transform(googl)
amzn = StandardScaler().fit_transform(amzn)
# global variables
latent_dim = 2
batch_size = 32
sequence_length = 30
# build time series samplers for each time series
c1 = keras.utils.timeseries_dataset_from_array(
msft,
targets = None,
sequence_length = sequence_length
)
c2 = keras.utils.timeseries_dataset_from_array(
googl,
targets = None,
sequence_length = sequence_length
)
c3 = keras.utils.timeseries_dataset_from_array(
amzn,
targets = None,
sequence_length = sequence_length
)
c4 = keras.utils.timeseries_dataset_from_array(
vals,
targets = None,
sequence_length = sequence_length
)
# add the encoder for the sine wave
sin_inputs = keras.layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
sx = layers.LSTM(64, return_sequences = True)(sin_inputs)
sx = layers.LSTM(64)(sx)
# build the encoders for each of the separate time series
msft_inputs = layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
mx = layers.LSTM(64, return_sequences = True)(msft_inputs)
mx = layers.LSTM(64)(mx)
# now for google
googl_inputs = layers.Input(shape=(sequence_length, 1))
gx = layers.LSTM(64, return_sequences = True)(googl_inputs)
gx = layers.LSTM(64)(gx)
# and for amazon
amzn_inputs = layers.Input(shape = (sequence_length, 1))
ax = layers.LSTM(64, return_sequences = True)(amzn_inputs)
ax = layers.LSTM(64)(ax)
# now combine them together for a single joint time series!
x = layers.Concatenate()([mx, gx, ax, sx])
# pass into a dense layer
x = layers.Dense(64, activation = 'relu')(x)
# and finally pass them into the final decoder!
z_mean = layers.Dense(latent_dim, name = 'z_mean')(x)
z_logvar = layers.Dense(latent_dim, name = 'z_logvar')(x)
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs, sin_inputs], [z_mean, z_logvar], name = 'encoder')
class Sampler(layers.Layer):
def call(self, z_mean, z_logvar):
batch_size = tf.shape(z_mean)[0]
n_dims = tf.shape(z_mean)[1]
epsilon = tf.random.normal(shape = (batch_size, n_dims))
return z_mean + tf.exp(0.5 * z_logvar) * epsilon
latent_inputs = keras.Input(shape=(latent_dim,))
dec = layers.RepeatVector(sequence_length)(latent_inputs)
dec = layers.LSTM(64, return_sequences=True)(dec)
out = layers.TimeDistributed(layers.Dense(1))(dec)
decoder = keras.Model(latent_inputs, out)
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super().__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.sampler = Sampler()
self.total_loss_tracker = keras.metrics.Mean(name = 'total_loss')
self.reconstruction_loss_tracker = keras.metrics.Mean(name = 'reconstruction_loss')
self.kl_loss_tracker = keras.metrics.Mean(name = 'kl_loss')
#property
def metrics(self):
return [self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker]
def train_step(self, data):
with tf.GradientTape() as tape:
z_mean, z_logvar = self.encoder(data)
z = self.sampler(z_mean, z_logvar)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.binary_crossentropy(data, reconstruction),
axis = (1, 2)
)
)
kl_loss = -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar))
total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"total_loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(), run_eagerly=False)
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat(), c4.repeat()), epochs = 10, steps_per_epoch = 10)
When I fit this model I get the following error:
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: (<tf.Tensor: shape=(128, 30, 1),
My issue is that this exact same model works when I only have 3 inputs instead of 4.
If I replace the lines where I specify the inputs everything seems to work fine:
x = layers.Concatenate()([mx, gx, sx])
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs], [z_mean, z_logvar], name = 'encoder')
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat()), epochs = 10, steps_per_epoch = 10)
So I'm curious about what it is about my setup that is causing my model to break when I add the fourth input.

How to build a Siamese Network from Transformer Model? Shape Input Error

I have the following Base Network with some important (error is coming due to these) parameters (please assume every else parameter)
maxlen = 250
model_dense = 256
Base Model :
def build_base_model(inputs):
inputs = layers.Input(shape=(maxlen,),name='base_input')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, trans_drop1, trans_drop2, trans_reg1, trans_reg2)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(model_drop1)(x)
outputs = layers.Dense(model_dense)(x)
base_model = keras.Model(inputs=inputs, outputs=outputs)
return base_model
and I my Siamese network as:
base_model = build_base_model()
input_text1 = layers.Input(shape=(maxlen,))
input_text2 = layers.Input(shape=(maxlen,))
emb1 = base_model(input_text1)
emb2 = base_model(input_text2)
distance = layers.Lambda(euclidean_distance)([emb1, emb2])
outputs = layers.Dense(1, activation="sigmoid")(distance)
model = keras.Model(inputs=[emb1, emb2], outputs=outputs)
model.compile(
optimizer="adam", metrics = ["accuracy",], loss= 'binary_crossentropy')
history = model.fit(
train_X, train_y, batch_size=batch_size, epochs = 50, validation_split = 0.15, callbacks = callbacks, verbose = 1,
)
It gives me an error as:
ValueError: Input 0 of layer "model_11" is incompatible with the layer: expected shape=(None, 256), found shape=(None, 250)
What am I doing wrong?
Base Transformer model tutorial taken from this
Siamese Model Structure, cosine distance, make_pairs from this
UPDATE- I have built the new network in a different manner and it is up and running. Can someone please confirms if it is the correct one:
inputs1 = layers.Input(shape=(maxlen,),name='inp_1')
inputs2 = layers.Input(shape=(maxlen,),name='inp_2')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, trans_drop1, trans_drop2, trans_reg1, trans_reg2)
pooling = layers.GlobalAveragePooling1D()
drop_layer = layers.Dropout(model_drop1)
out_dense = layers.Dense(model_dense)
x1 = embedding_layer(inputs1)
x2 = embedding_layer(inputs2)
x1 = transformer_block(x1)
x2 = transformer_block(x2)
x1 = pooling(x1)
x2 = pooling(x2)
x1 = drop_layer(x1)
x2 = drop_layer(x2)
vec_x1 = out_dense(x1)
vec_x2 = out_dense(x2)
distance = layers.Lambda(euclidean_distance)([vec_x1, vec_x2])
outputs = layers.Dense(1, activation="sigmoid")(distance)
model = keras.Model(inputs=[inputs1, inputs2], outputs=outputs)
in the linemodel = keras.Model(inputs=[emb1, emb2], outputs=outputs):
I suspect that you are mean to saymodel = keras.Model(inputs=[input_text1, input_text2], outputs=outputs)

How to put a Max-Min constraint on a hidden Dense Layer?

I am trying to understand how to put a constraint on a dense hidden layer in a model like the following one.
I would like to develop the function applyConstraint which rescale the values contained in params between the values min and max.
How would you do it?
inp = tfl.Input((10,))
dense = tfl.Dense(16, activation = 'relu')(inp)
dense = tfl.Dense(8, activation = 'relu')(dense)
params = tfl.Dense(3, activation = 'relu')(dense)
params_max_min = applyConstraint(params, min, max)
concat = tfl.Concatenate()([dense, params])
dense = tfl.Dense(16, activation = 'relu')(concat)
dense = tfl.Dense(8, activation = 'relu')(dense)
dense = tfl.Dense(1, activation = None)(dense)
model = tf.keras.Model(inputs = inp, outputs = dense)
model_params = tf.keras.Model(inputs = inp, outputs = params_max_min)
model.compile(optimizer = 'adam', loss = 'mse')
You can subclass tf.keras.constraints.Constraint and customize your own op to do what you want.
Define constraint:
import numpy as np
import tensorflow as tf
from tensorflow.keras.constraints import Constraint
class MinMaxConstraint(Constraint):
"""constrain model weights between [x_min, x_max]."""
def __init__(self, x_min=0.0, x_max=1.0):
super().__init__()
self.x_min = x_min
self.x_max = x_max
# TODO: add sanity check if x_max == x_min or w_max == w_min
def __call__(self, w):
w_min = tf.minimum(tf.math.reduce_min(w), self.x_min)
w_max = tf.maximum(tf.math.reduce_max(w), self.x_max)
scale = (self.x_max - self.x_min) / (w_max - w_min)
m = self.x_min - w_min * scale
w = w * scale
return w + m
Test on default case:
# random data
X = tf.random.normal([10, 2])
y = tf.random.normal([10])
# optimizer
m_opt = tf.keras.optimizers.Adam(1e-3)
# network definition
x_in = tf.keras.Input([2])
x = tf.keras.layers.Dense(4, kernel_constraint=MinMaxConstraint())(x_in)
x_out = tf.keras.layers.Dense(1)(x)
# model definition
model = tf.keras.models.Model(x_in, x_out)
# do a forward pass and update
with tf.GradientTape() as tape:
y_hat = model(X)
loss = tf.math.reduce_mean(tf.losses.MSE(y, y_hat))
m_vars = model.trainable_variables
m_grads = tape.gradient(loss, m_vars)
m_opt.apply_gradients(zip(m_grads, m_vars))
# check weights
assert np.all(model.get_weights()[0] >= 0.0)
assert np.all(model.get_weights()[0] <= 1.0)
# passes!
Test on [-2, 2]:
# reset network
x_in = tf.keras.Input([2])
x = tf.keras.layers.Dense(4, kernel_constraint=MinMaxConstraint(-2.0, 2.0))(x_in)
x_out = tf.keras.layers.Dense(1)(x)
# reset model
model = tf.keras.models.Model(x_in, x_out)
# do a forward pass and update
with tf.GradientTape() as tape:
y_hat = model(X)
loss = tf.math.reduce_mean(tf.losses.MSE(y, y_hat))
m_vars = model.trainable_variables
m_grads = tape.gradient(loss, m_vars)
m_opt.apply_gradients(zip(m_grads, m_vars))
# check weights again
assert np.all(model.get_weights()[0] >= -2.0)
assert np.all(model.get_weights()[0] <= 2.0)
# passes!
Try this:
import tensorflow as tf
params = tf.random.uniform((2, 3))
min, max = 4., 5.
def applyConstraint(params, min, max):
mn = tf.reduce_min(params)
mx = tf.reduce_max(params)
mult = (max-min)/(mx-mn)
p = min + (params - mn) * mult
return p
output = applyConstraint(params, min, max)

Tensorflow RNN: Perplexity per Epoch remains constant

I am training an RNN-based language-model using Tensorflow. The model is very similar to the PTB model example in the TF tutorials section. However, when I attempt to train the model on my own data, the perplexity of the model does not go down; it remains constant throughout multiple epochs. Could anyone let me know what I might be doing wrong.
I have a feeling that I am not handling the targets properly, but the gist of my code for the targets is:
def batcher(batch_size,unroll_steps,data,pad):
print(len(data))
batches = len(data) / batch_size
inp = []
target = []
for i in range(batches):
#print(len(data[i*batch_size:(i+1)*batch_size]))
x = data[i*batch_size:(i+1)*batch_size]
y = [ line[1:]+[pad] for line in x ]
yield (x,y)
That is, I just shift the data by 1 and use that as the target for the next word in a sentence.
The training script and model (class) are seen below
Training script (excerpt):
def train(session, model, folder,batch_size,unroll_steps,epoch):
word_to_id, id_to_word, train, val = build_inputs(folder,unroll_steps)
pad = word_to_id['<pad>']
costs = 0
iters = 0
train_size = len(train)
batch_size = model.batch_size
batches = train_size / batch_size
state = session.run(model._initial_state)
print("Running epoch %d" % epoch)
for i in range(batches):
fetches = [model.cost, model._final_state, model.logits]
feed_dict = {}
x = train[i*batch_size:(i+1)*batch_size]
y = [ line[1:] +[pad] for line in x ]
feed_dict[model.input] = x
feed_dict[model.targets] = y
feed_dict[model._initial_state] = state
#print("Cell-state complete - Running")
cost, state, logits = session.run(fetches, feed_dict)
#print("Single Run complete")
costs += cost
iters += model.unroll_steps
print("\tEpoch %d: Perplexity is %f" % (epoch, np.exp(costs/iters)))
return np.exp(costs/iters)
Model:
import tensorflow as tf
class LM(object):
def __init__(self, train, max_gradient, batch_size, unroll_steps, vocab, size, layers, learning_rate, init, prob):
self.batch_size = batch_size
self.max_gradient = max_gradient
self.layers = layers
self.learning_rate = learning_rate
self.unroll_steps = unroll_steps
self.init = init
#with tf. name_scope("Paramters"):
with tf.device('/gpu:0'), tf.name_scope("Input"):
self.input = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="input")
self.targets = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="targets")
#self.init = tf.placeholder(tf.float32, shape=[], name="init")
with tf.device('/gpu:0'), tf.name_scope("Embedding"):
embedding = tf.Variable(tf.random_uniform([vocab, size], -self.init, self.init), dtype=tf.float32, name="embedding")
embedded_input = tf.nn.embedding_lookup(embedding, self.input, name="embedded_input")
with tf.device('/gpu:0'), tf.name_scope("RNN"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
if train and prob < 1.0:
lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=prob)
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell for _ in range(layers)], state_is_tuple=True)
self._initial_state = cell.zero_state(batch_size, tf.float32)
outputs = []
state = self._initial_state
for step in range(unroll_steps):
if step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(embedded_input[:, step, :], state)
outputs.append(cell_output)
with tf.device('/gpu:0'), tf.name_scope("Cost"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
output = tf.reshape(tf.concat(outputs,1), [-1,size])
softmax_w = tf.get_variable("softmax_w", [size, vocab], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b
losses = []
for logit, target in zip([logits], [tf.reshape(self.targets,[-1])]):
target = tf.reshape(target, [-1])
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=target)
losses.append(loss)
self.cost = tf.reduce_sum(losses) / batch_size
self._final_state = state
self.logits = logits
scope.reuse_variables()
if not train:
return
with tf.device('/gpu:0'), tf.name_scope("Train"), tf.variable_scope(tf.get_variable_scope(), reuse=False):
train_variables = tf.trainable_variables()
gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_variables),self.max_gradient)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.training = optimizer.apply_gradients(zip(gradients, train_variables))
tf.get_variable_scope().reuse_variables()