The truth value of an array with more than one element is ambiguous. Reinforcement Learning Agent - tensorflow2.0

I'm starting in DQN, creating an agent to run in a custom environment :
class ReplayBuffer:
def __init__(self, capacity=10000):
self.buffer = deque(maxlen=capacity)
def store(self, state, action, reward, next_state, done):
self.buffer.append([state, action, reward, next_state, done])
def sample(self):
sample = random.sample(self.buffer, args.batch_size)
states, actions, rewards, next_states, done = map(np.asarray, zip(*sample))
states = np.array(states).reshape(args.batch_size, -1)
next_states = np.array(next_states).reshape(args.batch_size, -1)
return states, actions, rewards, next_states, done
def size(self):
return len(self.buffer)
class DQN:
def __init__(self, state_img, state_legal_actions,
state_price, state_risk_reward,
action_dim, state_dim):
self.state_img = state_img
self.state_legal_actions = state_legal_actions
self.state_price = state_price
self.state_risk_reward = state_risk_reward
self.action_dim = action_dim
self.state_dim = state_dim
self.epsilon = args.eps
# Define the input layers
self.df_img_input = self.state_img.shape
self.rr_input = self.state_risk_reward.shape
self.legal_actions_input = self.state_legal_actions.shape
self.price_input = self.state_price.shape
self.model = self.nn_model()
def nn_model(self):
# Create input layers
input_img = Input(shape = self.df_img_input)
input_numeric1 = Input(shape = self.price_input)
input_numeric2 = Input(shape = self.legal_actions_input)
input_numeric3 = Input(shape = self.rr_input)
# Create the CNN for the image input
cnn = Conv2D(64, (3,3), activation='relu',
padding='same')(input_img)
cnn = MaxPooling2D()(cnn)
cnn = Dropout(0.25)(cnn)
cnn = Conv2D(128, (3,3), activation='relu',
padding='same')(input_img)
cnn = MaxPooling2D()(cnn)
cnn = Dropout(0.25)(cnn)
cnn = Conv2D(128, (3,3), activation='relu',
padding='same')(input_img)
cnn = Dropout(0.2)(cnn)
cnn = Flatten()(cnn)
# Create the DNN for the numeric inputs
dnn1 = Dense(64, activation='relu')(input_numeric1)
dnn1 = Dropout(0.25)(dnn1)
dnn2 = Dense(32, activation='relu')(input_numeric2)
dnn2 = Dropout(0.25)(dnn2)
dnn3 = Dense(32, activation='relu')(input_numeric3)
dnn3 = Dropout(0.25)(dnn3)
# Concatenate the outputs from the CNN and DNN
concat = Concatenate()([cnn, dnn1, dnn2, dnn3])
# Create the output layers
dense1 = Dense(128, activation='relu')(concat)
dense1 = Dropout(0.25)(dense1)
dense2 = Dense(128, activation='relu')(dense1)
dense2 = Dropout(0.25)(dense2)
dense3 = Dense(self.action_dim, activation='sigmoid')(dense2)
# Create the model
model = Model(inputs=[input_img, input_numeric1, input_numeric2, input_numeric3], outputs=dense3)
# Compile the model
model.compile(optimizer='adam',
loss=tf.keras.losses.MeanSquaredError(),
metrics=['accuracy'])
return model
def predict(self, input_img,input_numeric1,input_numeric2, input_numeric3):
return self.model.predict(input_img,input_numeric1,input_numeric2, input_numeric3)
def get_action(self, state):
state = [state['df_img'], state['price'], state['legal_actions'], state['risk_reward']]
self.epsilon *= args.eps_decay
self.epsilon = max(self.epsilon, args.eps_min)
q_value = self.predict(state[0],state[1],state[2],state[3])
if np.random.random() < self.epsilon:
return random.randint(0, self.action_dim - 1)
return np.argmax(q_value)
def train(self, states, targets):
self.model.fit(states, targets, epochs=2)
class Agent:
def __init__(self, env):
self.env = env
self.obs_dim = ['df_img', 'legal_actions', 'price', 'risk_reward']
self.state_img = self.env.observation_space[self.obs_dim[0]]
self.state_legal_actions = self.env.observation_space[self.obs_dim[1]]
self.state_price = self.env.observation_space[self.obs_dim[2]]
self.state_risk_reward = self.env.observation_space[self.obs_dim[3]]
self.state_dim = np.hstack((self.state_img, self.state_risk_reward,
self.state_price, self.state_legal_actions))
self.action_dim = self.env.action_space.n
self.model = DQN(self.state_img, self.state_legal_actions,
self.state_price,self.state_risk_reward,
self.action_dim, self.state_dim)
self.target_model = DQN(self.state_img, self.state_legal_actions,
self.state_price,self.state_risk_reward,
self.action_dim, self.state_dim)
self.update_target()
self.buffer = ReplayBuffer()
def update_target(self):
weights = self.model.model.get_weights()
self.target_model.model.set_weights(weights)
def replay_experience(self):
for _ in range(10):
states, actions, rewards, next_states, done = self.buffer.sample()
targets = self.model.predict(states)
next_q_values = self.target_model.predict(next_states).max(axis=1)
targets[range(args.batch_size), actions] = (
rewards + (1 - done) * next_q_values * args.gamma
)
self.model.train(states, targets)
def train(self, max_episodes=1000):
for ep in range(max_episodes):
done, episode_reward = False, 0
observation = self.env.reset()
#print(observation[0])
while not done:
action = self.model.get_action(observation)
next_observation, reward, done, _ = self.env.step(action)
self.buffer.store(
observation, action, reward * 0.01, next_observation, done
)
episode_reward += reward
observation = next_observation
if self.buffer.size() >= args.batch_size:
self.replay_experience()
self.update_target()
print(f"Episode#{ep} Reward:{episode_reward}")
tf.summary.scalar("episode_reward", episode_reward, step=ep)
but when i run it with :
if __name__ == "__main__":
env = TradingEnv(df_)
agent = Agent(env)
agent.train(max_episodes=2)
i receive the folllowing error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) /tmp/ipykernel_3672/1951022009.py in
2 env = ForexTradingEnv(df_)
3 agent = Agent(env)
----> 4 agent.train(max_episodes=2) # Increase max_episodes value
5
/tmp/ipykernel_3672/3112839645.py in train(self, max_episodes)
43
44 while not done:
---> 45 action = self.model.get_action(observation)
46 next_observation, reward, done, _ = self.env.step(action)
47 self.buffer.store(
/tmp/ipykernel_3672/2209466094.py in get_action(self, state)
78 self.epsilon *= args.eps_decay
79 self.epsilon = max(self.epsilon, args.eps_min)
---> 80 q_value = self.predict(state[0],state[1],state[2],state[3])
81 if np.random.random() < self.epsilon:
82 return random.randint(0, self.action_dim - 1)
/tmp/ipykernel_3672/2209466094.py in predict(self, input_img,
input_numeric1, input_numeric2, input_numeric3)
71
72 def predict(self, input_img,input_numeric1,input_numeric2, input_numeric3):
---> 73 return self.model.predict(input_img,input_numeric1,input_numeric2,
input_numeric3)
74
75 def get_action(self, state):
~/miniconda3/envs/tf/lib/python3.9/site-packages/keras/utils/traceback_utils.py
in error_handler(*args, **kwargs)
68 # To get the full stack trace, call:
69 # tf.debugging.disable_traceback_filtering()
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
~/miniconda3/envs/tf/lib/python3.9/site-packages/keras/engine/data_adapter.py
in init(self, x, y, sample_weights, sample_weight_modes,
batch_size, epochs, steps, shuffle, **kwargs)
262 # If batch_size is not passed but steps is, calculate from the input
263 # data. Default to 32 for backwards compat.
--> 264 if not batch_size:
265 batch_size = int(math.ceil(num_samples / steps)) if steps else 32
266
ValueError: The truth value of an array with more than one element is
ambiguous. Use a.any() or a.all()
if you think you can help here would be really nice!
thanks in advance
best regards

Related

How to fix the issue of Input has undefined rank in TensorFlow?

I am trying to define a custom DensNet. But, I am getting a weird error and mot understand why. The code is as follows:
def densenet(input_shape, n_classes, filters = 32):
#batch norm + relu + conv
def bn_rl_conv(x,filters,kernel=1,strides=1):
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv2D(filters, kernel, strides=strides,padding = 'same')(x)
return x
def dense_block(x, repetition):
for _ in range(repetition):
y = bn_rl_conv(x, 4*filters)
y = bn_rl_conv(y, filters, 3)
x = concatenate([y,x])
return x
def transition_layer(x):
x = bn_rl_conv(x, K.int_shape(x)[-1] //2 )
x = AvgPool2D(2, strides = 2, padding = 'same')(x)
return x
inp = Input (input_shape)
x = Conv2D(64, 7, strides = 2, padding = 'same')(inp)
x = MaxPool2D(3, strides = 2, padding = 'same')(x)
for repetition in [2,4,6,4]:
d = dense_block(x, repetition)
x = transition_layer(d)
x = GlobalAveragePooling2D()(x)
output = Dense(n_classes, activation = 'softmax')(x)
model = Model(inp, output)
return model
input_shape = (1024,2,1)
num_classes = 24
model = densenet(input_shape,num_classes)
The error is stating the following:
/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
/usr/local/lib/python3.7/dist-packages/keras/layers/normalization/batch_normalization.py in build(self, input_shape)
296 if not input_shape.ndims:
297 raise ValueError(
--> 298 f'Input has undefined rank. Received: input_shape={input_shape}.')
299 ndims = len(input_shape)
300
ValueError: Input has undefined rank. Received: input_shape=<unknown>.
Why am I getting this error? I have already indicated the input shape. How can I fix this issue?
You are calling Input layer incorrectly. You're passing input_shape to the __call__() method instead of the shape parameter.
Change:
inp = Input (input_shape)
To:
inp = Input(shape=input_shape)

Keras Model works w/ 3 inputs but not 4

I'm trying to build a VAE for some time series data, but am having a hard time getting the model to work with 4 inputs instead of 3, and I'm not sure what's causing the problem.
Here's the complete code that I have:
# data for each time series
import yfinance as yf
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
# load in the data
msft = yf.Ticker('MSFT').history(period = '5y')[['Close']]
googl = yf.Ticker('GOOGL').history(period = '5y')[['Close']]
amzn = yf.Ticker('AMZN').history(period = '5y')[['Close']]
vals = np.sin(np.linspace(-100, 100, msft.shape[0]))[:, None]
# scale the data for numeric stability
msft = StandardScaler().fit_transform(msft)
googl = StandardScaler().fit_transform(googl)
amzn = StandardScaler().fit_transform(amzn)
# global variables
latent_dim = 2
batch_size = 32
sequence_length = 30
# build time series samplers for each time series
c1 = keras.utils.timeseries_dataset_from_array(
msft,
targets = None,
sequence_length = sequence_length
)
c2 = keras.utils.timeseries_dataset_from_array(
googl,
targets = None,
sequence_length = sequence_length
)
c3 = keras.utils.timeseries_dataset_from_array(
amzn,
targets = None,
sequence_length = sequence_length
)
c4 = keras.utils.timeseries_dataset_from_array(
vals,
targets = None,
sequence_length = sequence_length
)
# add the encoder for the sine wave
sin_inputs = keras.layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
sx = layers.LSTM(64, return_sequences = True)(sin_inputs)
sx = layers.LSTM(64)(sx)
# build the encoders for each of the separate time series
msft_inputs = layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
mx = layers.LSTM(64, return_sequences = True)(msft_inputs)
mx = layers.LSTM(64)(mx)
# now for google
googl_inputs = layers.Input(shape=(sequence_length, 1))
gx = layers.LSTM(64, return_sequences = True)(googl_inputs)
gx = layers.LSTM(64)(gx)
# and for amazon
amzn_inputs = layers.Input(shape = (sequence_length, 1))
ax = layers.LSTM(64, return_sequences = True)(amzn_inputs)
ax = layers.LSTM(64)(ax)
# now combine them together for a single joint time series!
x = layers.Concatenate()([mx, gx, ax, sx])
# pass into a dense layer
x = layers.Dense(64, activation = 'relu')(x)
# and finally pass them into the final decoder!
z_mean = layers.Dense(latent_dim, name = 'z_mean')(x)
z_logvar = layers.Dense(latent_dim, name = 'z_logvar')(x)
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs, sin_inputs], [z_mean, z_logvar], name = 'encoder')
class Sampler(layers.Layer):
def call(self, z_mean, z_logvar):
batch_size = tf.shape(z_mean)[0]
n_dims = tf.shape(z_mean)[1]
epsilon = tf.random.normal(shape = (batch_size, n_dims))
return z_mean + tf.exp(0.5 * z_logvar) * epsilon
latent_inputs = keras.Input(shape=(latent_dim,))
dec = layers.RepeatVector(sequence_length)(latent_inputs)
dec = layers.LSTM(64, return_sequences=True)(dec)
out = layers.TimeDistributed(layers.Dense(1))(dec)
decoder = keras.Model(latent_inputs, out)
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super().__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.sampler = Sampler()
self.total_loss_tracker = keras.metrics.Mean(name = 'total_loss')
self.reconstruction_loss_tracker = keras.metrics.Mean(name = 'reconstruction_loss')
self.kl_loss_tracker = keras.metrics.Mean(name = 'kl_loss')
#property
def metrics(self):
return [self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker]
def train_step(self, data):
with tf.GradientTape() as tape:
z_mean, z_logvar = self.encoder(data)
z = self.sampler(z_mean, z_logvar)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.binary_crossentropy(data, reconstruction),
axis = (1, 2)
)
)
kl_loss = -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar))
total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"total_loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(), run_eagerly=False)
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat(), c4.repeat()), epochs = 10, steps_per_epoch = 10)
When I fit this model I get the following error:
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: (<tf.Tensor: shape=(128, 30, 1),
My issue is that this exact same model works when I only have 3 inputs instead of 4.
If I replace the lines where I specify the inputs everything seems to work fine:
x = layers.Concatenate()([mx, gx, sx])
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs], [z_mean, z_logvar], name = 'encoder')
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat()), epochs = 10, steps_per_epoch = 10)
So I'm curious about what it is about my setup that is causing my model to break when I add the fourth input.

Training by party keras

I need your help.
my project in general is about the development of an image analysis algorithm for the quantification of ferritin in sclera.
My code allows to mix segmentation and regression with the sequential model and the output is unique (i.e. the training is done at the same time). I want it to be per part. i.e., it does the segmentation training, it records the result and starts the regression one.
the inputs of the segmentation are the images plus their masks;
the inputs of the regressions are the images plus the values of ferritins.
Segmentation and regression layers are renamed
because some of them share the same name when backbones are used
def load_model(segmentation_model, regression_model, width, height, num_classes = 1):
# Rename segmentation model layers and weights
for layer in segmentation_model.layers:
rename(segmentation_model, layer, layer.name + '_seg')
#for i, w in enumerate(segmentation_model.weights):
# split_name = w.name.split('/')
# new_name = split_name[0] + '_seg' + '/' + split_name[1]
# segmentation_model.weights[i]._handle_name = new_name
# Rename regression model layers
for layer in regression_model.layers:
rename(regression_model, layer, layer.name + '_reg')
#for i, w in enumerate(regression_model.weights):
# split_name = w.name.split('/')
# new_name = split_name[0] + '_reg' + '/' + split_name[1]
# regression_model.weights[i]._handle_name = new_name
image = layers.Input(shape=(width, height, 3), name="img")
mask_image = segmentation_model(image)
if num_classes==1:
mask_image_categorical = K.cast(K.squeeze(mask_image, axis=3) + 0.5, dtype='int32') # Threshold at 0.5
else:
mask_image_categorical = K.argmax(mask_image, axis=3)
masked_layer = mylayers.CustomMasking(mask_value=0)
masked_image = masked_layer.call([image, mask_image_categorical])
value = regression_model(masked_image)
m = models.Model(inputs=image, outputs=[mask_image, value])
#m = models.Model(inputs=image, outputs=[mask_image, value, mask_image_categorical, masked_image])
#for i, w in enumerate(m.weights): print(i, w.name)
m.summary()
return m
def make_train(model, regression_loss_weight, regression_loss_weight_max, train_generator, epochs, val_dataset, validation_steps, weights_path, logger_path, num_classes, focal, enable_plot, init_mode = False):
optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
regression_loss_weight_variable = K.variable(regression_loss_weight, name='regression_loss_weight')
mse = losses.MeanSquaredError()
weighted_mse = weighted_loss(mse, regression_loss_weight_variable)
weighted_mse.trainable = False
if focal:
if num_classes == 1:
bce = mylosses.binary_focal_loss
weighted_bce = weighted_loss(bce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_bce, 'model_regression' : weighted_mse}
else:
fce = mylosses.categorical_focal_loss
weighted_fce = weighted_loss(fce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_fce, 'model_regression' : weighted_mse}
else:
if num_classes == 1:
bce = losses.BinaryCrossentropy(from_logits=True)
weighted_bce = weighted_loss(bce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_bce, 'model_regression' : weighted_mse}
else:
cce = losses.CategoricalCrossentropy()
weighted_cce = weighted_loss(cce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_cce, 'model_regression' : weighted_mse}
metric = metrics.BinaryAccuracy() if num_classes == 1 else metrics.CategoricalAccuracy()
metric_array_segmentation = [metric, mymetrics.iou_score_threshold, mymetrics.f1_score_threshold]
metric_array_regression = [metrics.RootMeanSquaredError(), metrics.MeanAbsoluteError(), metrics.MeanAbsolutePercentageError()]
metric_dict = { 'model_segmentation' : metric_array_segmentation, 'model_regression' : metric_array_regression}
loss_weights = [1.0, 1.0] # Weight for regression is taken into account in weighted_mse loss function
model.compile(optimizer, loss, metric_dict, loss_weights)
loss_weight_callback = LossWeightsCallback(regression_loss_weight_variable, regression_loss_weight, regression_loss_weight_max, epochs, 'val_model_segmentation_f1-score')
freezing_callback = SegmentationModelFreezingCallback(model, 'val_model_segmentation_f1-score', 0.95)
checkpoint = ModelCheckpoint(weights_path, monitor='val_model_regression_root_mean_squared_error', mode='min', verbose=1, save_best_only=True)
csv_logger = CSVLogger(logger_path, append=True, separator=';')
lr_reducer = ReduceLROnPlateau(monitor='val_model_regression_root_mean_squared_error', mode='min', factor=0.2, patience=10, min_lr=10e-7, min_delta=0.01, verbose=1)
earlystopping = EarlyStopping(monitor='val_model_regression_root_mean_squared_error', mode='min', verbose=1, patience=20, restore_best_weights=True)
callbacks_list = [loss_weight_callback, freezing_callback, checkpoint, csv_logger, lr_reducer, earlystopping]
# Test custom masking layer or global model
#instance = train_generator[0]
#imgs = np.squeeze(instance[0], axis=3) if instance[0].shape[3] == 1 else instance[0]
#imsave("unmasked_img.png", imgs[0])
#masks = np.squeeze(instance[1]['model_segmentation'], axis=3) if instance[1]['model_segmentation'].shape[3] == 1 else instance[1]['model_segmentation']
#imsave("mask.png", masks[0] * 255)
#masked_layer = mylayers.CustomMasking(mask_value=0)
#masked_imgs = masked_layer.call([imgs, masks])
#img = K.eval(masked_imgs[0,:,:,:])
#imsave("masked_img.png", img)
#y = model(imgs)
#mask_image = y[0][0,:,:,:]
#value = K.eval(y[1][0])
if init_mode:
instance = train_generator[0]
model.train_on_batch(instance[0][:1], [instance[1]['model_segmentation'][:1], instance[1]['model_regression'][:1]] )
else:
results = model.fit(train_generator, epochs=epochs, validation_data=val_dataset, validation_steps=validation_steps, callbacks=callbacks_list)
model.save_weights(weights_path)
# Display of metrics and loss vs epochs: metric names must match the metric functions
if enable_plot:
if num_classes == 1:
plot_history(results,
metrics = ['model_segmentation_binary_accuracy', 'val_model_segmentation_binary_accuracy', 'model_regression_root_mean_squared_error', 'val_model_regression_root_mean_squared_error', 'model_regression_mean_absolute_error', 'val_model_regression_mean_absolute_error', 'model_regression_mean_absolute_percentage_error', 'val_model_regression_mean_absolute_percentage_error'],
losses = ['model_segmentation_loss', 'model_regression_loss', 'val_model_segmentation_loss', 'val_model_regression_loss'])
else:
plot_history(results,
metrics = ['model_segmentation_categorical_accuracy', 'val_model_segmentation_categorical_accuracy', 'model_segmentation_mean_io_u', 'val_model_segmentation_mean_io_u', 'model_segmentation_f1-score', 'val_model_segmentation_f1-score', 'model_regression_root_mean_squared_error', 'val_model_regression_root_mean_squared_error', 'model_regression_mean_absolute_error', 'val_model_regression_mean_absolute_error', 'model_regression_mean_absolute_percentage_error', 'val_model_regression_mean_absolute_percentage_error'],
losses = ['model_segmentation_loss', 'model_regression_loss', 'val_model_segmentation_loss', 'val_model_regression_loss'])
Defining model and loading weights is not enough when using custom model and/or layer
Model must also be compiled and trained on a minimal set in order to initialize
def init_model(model, loss_weight, loss_weight_max, train_generator, epochs, weights_path, logger_path, num_classes, focal):
make_train(model, loss_weight, loss_weight_max, train_generator, epochs, None, None, '', logger_path, num_classes, focal, False, True)
model.load_weights(weights_path)

Unhashable Type TypeError: Tensors are unhashable. Instead, use tensor.ref() as the key

I am trying to implement a custom variational autoencoder. Following is the code to reproduce.
epsilon_std = 1.0
vx = tf.keras.layers.Input(batch_shape=(None, max_length_output), name='vae_enc_in')
vx_emb = tf.keras.layers.Embedding(
vocab_tar_size,
embedding_dim,
input_length=max_length_output,
name='vae_enc_emb'
)(vx)
vxbi = tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(units, return_sequences=False, recurrent_dropout=0.2, name='vae_enc_lstm'), merge_mode='concat'
)(vx_emb)
vx_drop = tf.keras.layers.Dropout(0.2, name='vae_enc_drop')(vxbi)
vx_dense = tf.keras.layers.Dense(units, activation='linear', name='vae_enc_dense')(vx_drop)
vx_elu = tf.keras.layers.ELU(name='vae_enc_elu')(vx_dense)
vx_drop1 = tf.keras.layers.Dropout(0.2, name='vae_enc_drop2')(vx_elu)
z_mean = tf.keras.layers.Dense(20, name='vae_enc_dense2')(vx_drop1)
z_log_var = tf.keras.layers.Dense(20, name='vae_enc_dense3')(vx_drop1)
def sampling(args):
z_mean, z_log_var = args
epsilon = tf.random.normal(shape=(BATCH_SIZE, 20), mean=0.,
stddev=epsilon_std)
return z_mean + tf.math.exp(z_log_var / 2) * epsilon
z = tf.keras.layers.Lambda(sampling, output_shape=(20,), name='vae_lambda')([z_mean, z_log_var])
repeated_context = tf.keras.layers.RepeatVector(max_length_output, name='vae_repeat')
decoder_h = tf.keras.layers.LSTM(units, return_sequences=True, recurrent_dropout=0.2, name='vae_dec_lstm')
decoder_mean = tf.keras.layers.TimeDistributed(
tf.keras.layers.Dense(vocab_tar_size, activation='linear', name='vae_dec_lstm'),
name='vae_dec_time_dist'
)
h_decoded = decoder_h(repeated_context(z))
x_decoded_mean = decoder_mean(h_decoded)
def zero_loss(y_true, y_pred):
print("ZERO LOSS")
return tf.zeros_like(y_pred)
And then creating a custom vae layer
class VAELayer(tf.keras.layers.Layer):
def __init__(self, batch_size, max_len, **kwargs):
self.is_placeholder = True
super(VAELayer, self).__init__(**kwargs)
self.target_weights = tf.constant(np.ones((batch_size, max_len)), tf.float32)
def vae_loss(self, x, x_decoded_mean):
#xent_loss = K.sum(metrics.categorical_crossentropy(x, x_decoded_mean), axis=-1)
labels = tf.cast(x, tf.int32)
xent_loss = tf.math.reduce_sum(
tfa.seq2seq.sequence_loss(
x_decoded_mean,
labels,
weights=self.target_weights,
average_across_timesteps=False,
average_across_batch=False
),
axis=-1
)
#softmax_loss_function=softmax_loss_f), axis=-1)#, for sampled softmax
kl_loss = - 0.5 * tf.math.reduce_sum(1 + z_log_var - tf.math.square(z_mean) - tf.math.exp(z_log_var), axis=-1)
return tf.math.reduce_mean(xent_loss + kl_loss)
def call(self, inputs):
x = inputs[0]
x_decoded_mean = inputs[1]
print(x.shape, x_decoded_mean.shape)
loss = self.vae_loss(x, x_decoded_mean)
print("Adding loss")
self.add_loss(loss, inputs=inputs)
print("Returning ones like")
return tf.ones_like(x)
I compiled it successfully and also produced a test output by calling the model. But when i try to train, it, It produces the error
TypeError: Tensors are unhashable. (KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='tf.math.reduce_sum_25/Sum:0', description="created by layer 'tf.math.reduce_sum_25'"))Instead, use tensor.ref() as the key.
Following is the code for compiling and fitting the model
loss_layer = VAELayer(BATCH_SIZE, max_length_output)([vx, x_decoded_mean])
vae = tf.keras.Model(vx, [loss_layer], name='VariationalAutoEncoderLayer')
opt = tf.keras.optimizers.Adam(lr=0.01) #SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
vae.compile(optimizer=opt, loss=[zero_loss])
def vae_sentence_generator():
for ip, tg in train_dataset:
yield tg.numpy()
vae.fit(vae_sentence_generator(steps_per_epoch=steps_per_epoch, epochs=10))

DeblurGAN can't load his own weights anymore

Hey I realy need some help =)
firstly, sorry that it's soo long^^ but I hope that you don't need the full code at the end.
I coded a GAN for deblurring. Now I'm training it. the first 71 epochs have been trained without any problems: I trained some epochs till the colab GPU-time limit was reached, the next day I loaded my weights into the gan and continued training.
2 or 3 weeks ago I wanted to load the weights of epoch 71 in my Gan but I recieved the following error (I'm quite sure that I didn't change anything in the code). Since this moment I only can load the first 65 weights and i get the same error for every epoch higher than 65:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-a35c9a2bbf3a> in <module>()
1 # Load weights
----> 2 gan.load_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/deblurGAN_weights66_batchsize_1.h5")
5 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in load_weights(self, filepath, by_name, skip_mismatch, options)
2209 f, self.layers, skip_mismatch=skip_mismatch)
2210 else:
-> 2211 hdf5_format.load_weights_from_hdf5_group(f, self.layers)
2212
2213 def _updated_config(self):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/saving/hdf5_format.py in load_weights_from_hdf5_group(f, layers)
706 str(len(weight_values)) + ' elements.')
707 weight_value_tuples += zip(symbolic_weights, weight_values)
--> 708 K.batch_set_value(weight_value_tuples)
709
710
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py in batch_set_value(tuples)
3574 if ops.executing_eagerly_outside_functions():
3575 for x, value in tuples:
-> 3576 x.assign(np.asarray(value, dtype=dtype(x)))
3577 else:
3578 with get_graph().as_default():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py in assign(self, value, use_locking, name, read_value)
856 with _handle_graph(self.handle):
857 value_tensor = ops.convert_to_tensor(value, dtype=self.dtype)
--> 858 self._shape.assert_is_compatible_with(value_tensor.shape)
859 assign_op = gen_resource_variable_ops.assign_variable_op(
860 self.handle, value_tensor, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py in assert_is_compatible_with(self, other)
1132 """
1133 if not self.is_compatible_with(other):
-> 1134 raise ValueError("Shapes %s and %s are incompatible" % (self, other))
1135
1136 def most_specific_compatible_shape(self, other):
ValueError: Shapes (4, 4, 64, 128) and (64,) are incompatible
I was looking a long time for a solution and i didn't find a real one. But I found out, that if I train one epoch with one of the old weights (1-65) afterwards I can load one of the new weights. So I thought that I could use this "workaround" but yesterday I plotted the scores of the metric of the Test dataset for every epoch. I recieved this picture:
psnrscore/epoch
as you can see it looks like I'm producing trash since epoch 65 (on the pic since 60 because I lost the first 5 epochs, so it starts by 6)
I'm realy frustrated and hope that someone could help me =D
Here's the full code of the GAN:
# Libraries to build the model
from tensorflow import pad
from tensorflow.keras.layers import Layer
from keras.layers import Input, Activation, Add, UpSampling2D
from keras.layers.merge import Add
from keras.layers.core import Dropout, Dense, Flatten
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.core import Lambda
from keras.layers.normalization import BatchNormalization
from keras.models import Model
import keras.backend as K
from keras.applications.vgg16 import VGG16
from keras.optimizers import Adam
import keras
# Reflection padding
from keras.engine import InputSpec
import tensorflow as tf
from keras.engine.topology import Layer
'''
2D Reflection Padding
Attributes:
- padding: (padding_width, padding_height) tuple
'''
class ReflectionPadding2D(Layer):
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
self.input_spec = [InputSpec(ndim=4)]
super(ReflectionPadding2D, self).__init__(**kwargs)
def compute_output_shape(self, s):
""" If you are using "channels_last" configuration"""
return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
def call(self, x, mask=None):
w_pad,h_pad = self.padding
return tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')
# Res Block
def res_block(input, filters, kernel_size = (3,3), strides = (1,1), use_dropout = False):
"""
Instanciate a Keras Resnet Block using sequential API.
:param input: Input tensor
:param filters: Number of filters to use
:param kernel_size: Shape of the kernel for the convolution
:param strides: Shape of the strides for the convolution
:param use_dropout: Boolean value to determine the use of dropout
:return: Keras Model
"""
x = ReflectionPadding2D((1,1))(input)
x = Conv2D(filters = filters,
kernel_size = kernel_size,
strides = strides,)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
if use_dropout:
x = Dropout(0.5)(x)
x = ReflectionPadding2D((1,1))(x)
x = Conv2D(filters = filters,
kernel_size = kernel_size,
strides = strides,)(x)
x = BatchNormalization()(x)
# Two convolution layers followed by a direct connection between input and output (skip connection)
out = Add()([input, x])
return out
# Generator
n_res_blocks = 9
def generator_model():
# encoder
inputs = Input(shape = img_shape)
x = ReflectionPadding2D((3, 3))(inputs)
x = Conv2D(filters = 64, kernel_size = (7,7), padding = 'valid')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(128, (3,3), strides=2, padding='same') (x) #DIM(15,15,128)
x = BatchNormalization() (x)
x = Activation('relu') (x)
x = Conv2D(256, (3,3), strides = 2, padding = 'same') (x) #DIM(7,7,256)
x = BatchNormalization() (x)
x = Activation('relu') (x)
# Apply 9 res blocks
for i in range(n_res_blocks):
x = res_block(x, 256, use_dropout = True)
# decoder
#x = Conv2DTranspose(128, (3,3), strides = 2, padding = 'same') (x)
x = UpSampling2D()(x)
x = Conv2D(filters = 128, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
#x = Conv2DTranspose(64, (3,3), strides = 2, padding = 'same') (x)
x = UpSampling2D()(x)
x = Conv2D(filters = 64, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = ReflectionPadding2D((3,3))(x)
x = Conv2D(filters = 3, kernel_size = (7,7), padding = 'valid')(x)
x = Activation('tanh')(x)
# Add direct connection from input to output and recenter to [-1, 1] (skip connection)
outputs = Add()([x, inputs])
outputs = Lambda(lambda z: z/2)(outputs) # to keep normalized outputs
model = Model(inputs = inputs, outputs = outputs, name = 'Generator')
return model
# Discriminator
def discriminator_model():
Input_img = Input(shape=(img_shape))
x = Conv2D(filters = 64, kernel_size = (4, 4), strides = 2, padding='same')(Input_img)
x = LeakyReLU(0.2)(x)
nf_mult, nf_mult_prev = 1, 1
for n in range(3):
nf_mult_prev, nf_mult = nf_mult, min(2**n, 8)
x = Conv2D(filters = 64*nf_mult, kernel_size = (4, 4), strides = 2, padding = 'same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
nf_mult_prev, nf_mult = nf_mult, 8
x = Conv2D(filters = 64*nf_mult, kernel_size = (4, 4), strides = 1, padding = 'same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
x = Conv2D(filters = 1, kernel_size = (4, 4), strides = 1, padding = 'same')(x)
x = Flatten()(x)
x = Dense(1024, activation = 'tanh')(x)
x = Dense(1, activation = 'sigmoid')(x)
model = Model(inputs = Input_img, outputs = x, name = 'discriminator')
return model
def gan_model(generator, discriminator):
inputs = Input(shape = img_shape)
generated_images = generator(inputs)
outputs = discriminator(generated_images)
model = Model(inputs=inputs, outputs = [generated_images, outputs])
return model
#Losses
#Wassersteinloss:
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
# vgg16 model for perceptual loss
vgg = VGG16(include_top = False, weights = 'imagenet', input_shape = img_shape)
loss_model = Model(inputs = vgg.input, outputs = vgg.get_layer('block3_conv3').output)
loss_model.trainable = False
#perceptual loss:
def perceptual_loss(y_true, y_pred):
return K.mean(K.square(loss_model(y_true) - loss_model(y_pred)))
#Metrics:
#SSIM:
def ssim_metric(y_true, y_pred):
return tf.reduce_mean(tf.image.ssim(tf.convert_to_tensor(y_true),tf.convert_to_tensor(y_pred), max_val=1.0, ))
#PSNR:
def psnr_metric(y_true, y_pred):
return tf.reduce_mean(tf.image.psnr(y_true, y_pred, max_val=1.0))
def training(epochs, batch_size):
path_psnr = F"/content/gdrive/My Drive/Colab Notebooks/data/psnr"
path_ssim = F"/content/gdrive/My Drive/Colab Notebooks/data/ssim"
GAN_losses = []
#psnrs = []
#ssims = []
random_idx = np.arange(0, X_train.shape[0])
n_batches = int (len(random_idx)/batch_size) #divide trainingset into batches of batch_size
for e in range(epochs):
#weights_name = "deblurGAN_weights%s_batchsize_%r.h5" %(e + 66, batch_size)
weights_name = "deblurGAN_weights_test.h5"
print("epoch: %s " %(e + 66))
#randomize index of trainig set
random.shuffle(random_idx)
for i in range(n_batches):
img_batch_blured = X_train[i*batch_size:(i+1)*batch_size]
img_batch_generated = generator.predict(img_batch_blured)
img_batch_original = Y_train[i*batch_size:(i+1)*batch_size]
img_batch = np.concatenate((img_batch_generated , img_batch_original),0)
valid0 = -np.ones(batch_size)
valid1 = np.ones(batch_size)
valid = np.concatenate((valid0,valid1))
discriminator.trainable = True
for k in range(5):
loss = discriminator.train_on_batch(img_batch, valid)
discriminator.trainable = False
GAN_loss = gan.train_on_batch(img_batch_blured, [img_batch_original, valid1])
GAN_losses.append(GAN_loss)
if (100*i/n_batches).is_integer():
psnr = psnr_metric(img_batch_original, img_batch_generated)
ssim = ssim_metric(img_batch_original, img_batch_generated)
psnrs.append(psnr)
ssims.append(ssim)
#creating 2 files in Google Drive where the psnr and ssim data will be saved.
pickle.dump( psnrs, open( path_psnr, "wb" ) )
pickle.dump( ssims, open( path_ssim, "wb" ) )
print((100*i/n_batches) + 1, "% psnr: ", psnr," ssim: ", ssim)
# Save weights: mode the path to your directory
gan.save_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/{weights_name}")
return [GAN_losses, psnrs, ssims]
# Initialize models
generator = generator_model()
discriminator = discriminator_model()
gan = gan_model(generator, discriminator)
# Initialize optimizers
d_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
gan_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
# Compile models
discriminator.trainable = True
discriminator.compile(optimizer = d_opt, loss = wasserstein_loss)
discriminator.trainable = False
loss = [perceptual_loss, wasserstein_loss]
loss_weights = [100, 1]
gan.compile(optimizer = gan_opt, loss = loss, loss_weights = loss_weights)
discriminator.trainable = True
gan.summary()
# Load weights
gan.load_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/deblurGAN_weights66_batchsize_1.h5")
#connect to GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
loss = training(1, 1) #epochs, batchsize
It is solved an can be closed. I didn't know that the "discriminato.Trainable = True/False" was changed. It seems to be the reason for another ordering in the weights.