I am getting the following error:
KeyError Traceback (most recent call last)
<ipython-input-254-f01ba8163f7d> in <module>
1 out_batch = NBatchLogger(display=1000)
2 model.fit(X_train, Y_train, epochs=1000, batch_size=250,verbose = 0,
----> 3 callbacks=[out_batch])
1 frames
<ipython-input-247-55bb2505c62e> in on_batch_end(self, batch, logs)
14 def on_batch_end(self, batch, logs={}):
15 self.step += 1
---> 16 for k in self.params['metrics']:
17 if k in logs:
18 self.metric_cache[k] = self.metric_cache.get(k, 0) + logs[k]
KeyError: 'metrics
Here is my code:
class PrintProgress(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('Epoch', epoch)
class NBatchLogger(keras.callbacks.Callback):
"""
A Logger that log average performance per `display` steps.
"""
def __init__(self, display):
self.step = 0
self.display = display
self.metric_cache = {}
def on_batch_end(self, batch, logs={}):
self.step += 1
for k in self.params['metrics']:
if k in logs:
self.metric_cache[k] = self.metric_cache.get(k, 0) + logs[k]
if self.step % self.display == 0:
metrics_log = ''
for (k, v) in self.metric_cache.items():
val = v / self.display
if abs(val) > 1e-3:
metrics_log += ' - %s: %.4f' % (k, val)
else:
metrics_log += ' - %s: %.4e' % (k, val)
print('step: {}/{} ... {}'.format(self.step,
self.params['steps'],
metrics_log))
self.metric_cache.clear()
tf.keras.backend.clear_session(
)
When trying to compute confusion matrix
confusion_matrix(np.argmax(Y_train, axis = 1), pred_train)
I got the following error:
ValueError: Classification metrics can't handle a mix of multiclass and continuous-multioutput targets
The callback's params only have values used in fit call (in this case verbose, epochs and steps). If you want to access model's metrics from within callback, you need to set the model for callback with
out_batch.set_model(model)
And then access it with self.model.metrics inside callback's method.
Here is your Callback implementation with fixes:
class NBatchLogger(keras.callbacks.Callback):
"""
A Logger that log average performance per `display` steps.
"""
def __init__(self, display):
self.step = 0
self.display = display
self.metric_cache = {}
def on_batch_end(self, batch, logs=None):
self.step += 1
for k in self.model.metrics:
if k.name not in self.metric_cache.keys():
self.metric_cache[k.name] = 0.0
self.metric_cache[k.name] += logs.get(k.name)
if self.step % self.display == 0:
metrics_log = ''
for (k, v) in self.metric_cache.items():
val = v / self.display
if abs(val) > 1e-3:
metrics_log += ' - %s: %.4f' % (k, val)
else:
metrics_log += ' - %s: %.4e' % (k, val)
print('step: {}/{} ... {}'.format(self.step,
self.params['steps'],
metrics_log))
self.metric_cache.clear()
And output I got:
step: 10/240 ... - loss: 2.2448 - accuracy: 0.2207
step: 20/240 ... - loss: 2.0735 - accuracy: 0.3876
step: 30/240 ... - loss: 1.8155 - accuracy: 0.4899
step: 40/240 ... - loss: 1.5696 - accuracy: 0.5502
step: 50/240 ... - loss: 1.3779 - accuracy: 0.6002
step: 60/240 ... - loss: 1.2252 - accuracy: 0.6412
EDIT:
To fix the error ValueError: Classification metrics can't handle a mix of multiclass and continuous-multioutput targets with confusion matrix, you should change
confusion_matrix(np.argmax(y_train, axis=1), pred_train)
to
confusion_matrix(np.argmax(y_train, axis=1), np.argmax(pred_train, axis=1))
because you need to convert predicted labels same way as train labels.
Related
I am solving a Timeseries problem using LSTM VAE(Variational auto-encoder), I have built my VAE model as below
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
class VAE:
def __init__(self,
hidden_layer_units,
hidden_layer_leakyrelu_alphas,
hidden_layer_dropout_rates,
batch_size,
time_steps,
num_features,
is_stateful_learning):
self.hidden_layer_units = hidden_layer_units
self.hidden_layer_leakyrelu_alphas = hidden_layer_leakyrelu_alphas
self.hidden_layer_dropout_rates = hidden_layer_dropout_rates
self.encoder_num_layers = 0
self.latent_space_dim = 0
vae_total_layers = len(hidden_layer_units)
if 0 < vae_total_layers:
self.encoder_num_layers = int((vae_total_layers - 1) / 2)
self.latent_space_dim = self.hidden_layer_units[self.encoder_num_layers]
self.batch_size = batch_size
self.time_steps = time_steps
self.num_features = num_features
self.is_stateful_learning = is_stateful_learning
self.encoder = None
self.decoder = None
self.model = None
self.model_input = None
self.model_output = None
self.mu = None
self.log_variance = None
self.kulback_coef = 0.0001
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss, self._calculate_kl_loss])
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_encoder(self):
encoder_input = self._add_encoder_input()
lstm_layers = self._add_encoder_lstm_layers(encoder_input)
bottleneck = self._add_bottleneck(lstm_layers)
self.model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
repeater_layer = self._add_repeater_layer(decoder_input)
lstm_layer = self._add_decoder_lstm_layer(repeater_layer)
decoder_output = self._add_decoder_output(lstm_layer)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _build_autoencoder(self):
model_input = self.model_input
encoder_output = self.encoder(model_input)
model_output = self.decoder(encoder_output)
self.model_output = model_output
self.model = Model(model_input, model_output, name="autoencoder")
def _add_encoder_input(self):
if self.is_stateful_learning:
x = Input(batch_shape=(self.batch_size, self.time_steps, self.num_features), name="encoder_input")
else:
x = Input(shape=(self.time_steps, self.num_features), name="encoder_input")
return x
def _add_encoder_lstm_layers(self, encoder_input):
""" Create all lstm layers in encoder."""
x = encoder_input
for layer_index, units in enumerate(self.hidden_layer_units[:self.encoder_num_layers]):
lstm_params = {}
if layer_index < self.encoder_num_layers - 1:
lstm_params["return_sequences"] = True
if self.is_stateful_learning:
lstm_params["stateful"] = True
x = LSTM(units=units, **lstm_params)(x)
x = LeakyReLU(alpha=self.hidden_layer_leakyrelu_alphas[layer_index])(x)
x = Dropout(rate=self.hidden_layer_dropout_rates[layer_index])(x)
return x
def _add_bottleneck(self, x):
""" add bottleneck with Guassian sampling (Dense layer)."""
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim, name="log_variance")(x)
x = Lambda(self.sample_point_from_normal_distribution, name="encoder_output")([self.mu, self.log_variance])
return x
def sample_point_from_normal_distribution(self, args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
def _add_decoder_input(self):
if self.is_stateful_learning:
x = Input(batch_shape=(self.batch_size, self.latent_space_dim), name="decoder_input")
else:
x = Input(shape=(self.latent_space_dim), name="decoder_input")
return x
def _add_repeater_layer(self, decoder_input):
return RepeatVector(self.time_steps)(decoder_input)
def _add_decoder_lstm_layer(self, repeater_layer):
x = repeater_layer
for layer_index, units in enumerate(self.hidden_layer_units[self.encoder_num_layers + 1:]):
lstm_params = {}
if self.is_stateful_learning:
# stateful build
lstm_params = {'stateful': True, 'return_sequences': True}
else:
lstm_params["return_sequences"] = True
layer_no = layer_index + self.encoder_num_layers + 1
x = LSTM(units=units, **lstm_params)(x)
x = LeakyReLU(alpha=self.hidden_layer_leakyrelu_alphas[layer_no])(x)
x = Dropout(rate=self.hidden_layer_dropout_rates[layer_no])(x)
return x
def _add_decoder_output(self, lstm_layer):
return TimeDistributed(Dense(1))(lstm_layer)
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = self._calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = self._calculate_kl_loss(y_target, y_predicted)
combined_loss = reconstruction_loss + (self.kulback_coef * kl_loss)
return combined_loss
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=1)
return reconstruction_loss
def _calculate_kl_loss(self, y_target, y_predicted):
kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mu) - K.exp(self.log_variance), axis=1)
return kl_loss
# Build Variational AutoEncoder(VAE) LSTM Model:
def build_lstm_neural_network(lstm_layer_units=[], leakyrelu_layer_alphas=[], dropout_layer_rates=[],
number_of_sequences=32, time_steps=32, data_dim=1, is_stateful_learning=False):
vae = VAE(
hidden_layer_units=lstm_layer_units,
hidden_layer_leakyrelu_alphas=leakyrelu_layer_alphas,
hidden_layer_dropout_rates=dropout_layer_rates,
batch_size=number_of_sequences,
time_steps=time_steps,
num_features=data_dim,
is_stateful_learning=is_stateful_learning
)
vae.compile(learning_rate)
vae.summary()
return vae.model
Model training block looks as below
# configuration
nn_lstm_layer_units = [160, 3, 160]
nn_leakyrelu_layer_alphas = [0.0, 0.0, 0.0]
nn_dropout_layer_rates = [0.3, 0.0, 0.3]
batch_size = 96
win_length = 64
num_features = 6 # You can use single variate Timeseries data as well, num_features = 1
epochs = 782
learning_rate = 0.0001
want_stateful_learning = True
# Build LSTM VAE model
model = build_lstm_neural_network(nn_lstm_layer_units, nn_leakyrelu_layer_alphas, nn_dropout_layer_rates, batch_size,
win_length, num_features, want_stateful_learning)
TIME_STEPS = win_length
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
output = []
for i in range(len(values) - time_steps + 1):
output.append(values[i: (i + time_steps)])
return np.stack(output)
x_train = create_sequences(x_train)
x_val = create_sequences(x_val)
callbacks = []
unfit_train_record_count = 0
unfit_val_record_count = 0
if want_stateful_learning:
# stateful learning
# adjust train data size(should be in multiples of batch size)
unfit_train_record_count = len(x_train) % batch_size
unfit_val_record_count = len(x_val) % batch_size
# Reset states of the stateful model on epoch end
stateful_model_reset_states = LambdaCallback(on_epoch_end=lambda batch, logs: model.reset_states())
callbacks.append(stateful_model_reset_states)
early_stopping = EarlyStopping(monitor=monitor, patience=patience)
callbacks.append(early_stopping)
# Model traning
history = model.fit(x=x_train[unfit_train_record_count:], y=x_train[unfit_train_record_count:, :, [0]], validation_data=(x_val[unfit_val_record_count:], x_val[unfit_val_record_count:, :, [0]]), batch_size=batch_size, epochs=epochs, shuffle=False, callbacks=callbacks)
The stateless mode of the model is working as expected but the stateful mode is throwing an error as below-
1632/1632 [==============================] - ETA: 0s - loss: 0.2447 - _calculate_reconstruction_loss: 0.2447 - _calculate_kl_loss: 0.0326
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [96,3]
[[{{node decoder_input}}]]
[[metrics/_calculate_reconstruction_loss/Identity/_229]]
(1) Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [96,3]
[[{{node decoder_input}}]]
Environment used is as
Python-3.8.12,
Tensorflow-gpu: 2.5,
cudnn: 8.2.1.32
I am not clear why the stateful model run 1 Epoch for training data, but as soon as it starts to process the validation data, it throws the error.
I had the same experiences with dataset and loss function that not suitable, I try to simulate again it possible no loss value change, no loss as nan, error when validation.
That is possible no value, no match or not update neuron, you can use Tensorflow 2.x is a lot moire easier.
This is no match validation: Working on training but results in errors when validation. ( one possible )
Epoch 1/100
2022-01-23 21:04:59.846791: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
1/1 [==============================] - ETA: 0s - loss: 3.1866 - accuracy: 0.0000e+00Traceback (most recent call last):
Another possible is loss Fn no match: It is possible they are not update the neurons
Epoch 1/100
2022-01-23 21:08:23.330068: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
1/1 [==============================] - 3s 3s/step - loss: 13.7138 - accuracy: 0.2000 - val_loss: 8.2133 - val_accuracy: 0.0000e+00
Epoch 2/100
1/1 [==============================] - 0s 65ms/step - loss: 7.7745 - accuracy: 0.0000e+00 - val_loss: 8.0456 - val_accuracy: 0.0000e+00
I solved the problem, by changing the loss calculation logic, instead of defining the functions to calculate reconstruction and KL loss in the VAE class, I moved the loss calculation part outside the VAE class as below
# Build Variational AutoEncoder(VAE) LSTM Model:
def build_lstm_neural_network(lstm_layer_units=[], leakyrelu_layer_alphas=[], dropout_layer_rates=[],
number_of_sequences=32, time_steps=32, data_dim=1, is_stateful_learning=False):
vae = VAE(
hidden_layer_units=lstm_layer_units,
hidden_layer_leakyrelu_alphas=leakyrelu_layer_alphas,
hidden_layer_dropout_rates=dropout_layer_rates,
batch_size=number_of_sequences,
time_steps=time_steps,
num_features=data_dim,
is_stateful_learning=is_stateful_learning
)
# Add reconstruction loss
error = vae.model_input - vae.model_output
reconstruction_loss = K.mean(K.square(error))
vae.model.add_loss(reconstruction_loss)
vae.model.add_metric(reconstruction_loss, name='mse_loss', aggregation='mean')
# Add KL loss
kl_loss = kl_beta * K.mean(-0.5 * K.sum(1 + vae.log_variance - K.square(vae.mu) - K.exp(vae.log_variance), axis = 1), axis=0)
model.add_loss(kl_loss)
model.add_metric(kl_loss, name='kl_loss', aggregation='mean')
optimizer = Adam(learning_rate=vae.learning_rate, clipvalue=vae.clipvalue)
vae.model.compile(loss=None, optimizer=optimizer)
vae.summary()
return vae.model
Why the difference between - loss: 0.2105 - accuracy: 0.9500 - val_loss: 0.5264 - val_accuracy: 0.9100
and
accuracy:0.700 printed in CustomCallback?
I'm pretty sure that CustomCallback's val_accuracy is correct.
[1]: https://i.stack.imgur.com/Wiy46.png
Callbacks =[
checkpoint_callback(start_time),
CustomCallback(val_ds, result, files_path, class_names,start_time)]
model.fit(train_ds, epochs=40, verbose = 1, callbacks=Callbacks,validation_data=val_ds,steps_per_epoch=20)
below is the CustomCallback which will count and print accuracy, leakage, and overkill
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, val_ds, result, files_path, class_names,start_time):
super(CustomCallback, self).__init__()
self.start_time = start_time
self.val_ds = val_ds
self.result = result
self.files_path = files_path
self.class_names = class_names
def get_map(self, pred_valid, epoch):
overkill_count = 0
leakage_count = 0
good_count = 0
bad_count = 0
same = 0
for i ,f in enumerate(self.files_path):
pred = self.class_names[np.argmax(pred_valid[i])]
label = self.result[f]['label']
if pred == label :
same += 1
if label == 'good':
good_count = good_count + 1
if pred != 'good':
overkill_count = overkill_count + 1
else:
bad_count = bad_count + 1
if pred == 'good':
leakage_count = leakage_count + 1
print(".\n.\n.\n.\n.\n.\n")
print("epoch: ", epoch+1)
print("total: ",good_count+bad_count)
print("accuracy: ", same/(good_count+bad_count))
print("good count: ", good_count)
print("bad count: ", bad_count)
print("overkill count: ", overkill_count)
print("leakage count: ", leakage_count)
overkill_rate = overkill_count / good_count
leakage_rate = leakage_count / bad_count
print("overkill_rate: ", overkill_rate)
print("leakage_rate: ", leakage_rate)
def on_epoch_end(self, epoch, logs=None):
model_test = self.model
for layer in model_test.layers:
layer.trainable = False
print("model predicting.....")
pred_valid = model_test.predict(self.val_ds)
print("model predict done.")
self.get_map(pred_valid, epoch)
print("totally spends {:1f} minutes.".format((time.time()-self.start_time)/60))
You have passed 2 datasets, the training dataset, and the validation dataset
The model fits on the training dataset, and does not see anything from the validation dataset. After each epoch, the model evaluates on the validation dataset and thus giving you a total of 4 metrics
And after each epoch, loss is the loss from the training dataset, and accuracy is the accuracy metric of the training dataset.
val_loss is the loss from validation dataset, and val_accuracy is the accuracy from the validation dataset.
val_loss and val_accuracy is printed out so that you can compare it to the training loss and accuracy and determine if your model is overfitting to the training dataset or not.
I want to custom loss function to quantile loss(pinball loss) in XGBRegressor
I use this code
def xgb_quantile_eval(preds, dmatrix, quantile=0.2):
labels = dmatrix.get_label()
return ('q{}_loss'.format(quantile),
np.nanmean((preds >= labels) * (1 - quantile) * (preds - labels) +
(preds < labels) * quantile * (labels - preds)))
def xgb_quantile_obj(preds, dmatrix, quantile=0.2):
try:
assert 0 <= quantile <= 1
except AssertionError:
raise ValueError("Quantile value must be float between 0 and 1.")
labels = dmatrix.get_label()
errors = preds - labels
left_mask = errors < 0
right_mask = errors > 0
grad = -quantile * left_mask + (1 - quantile) * right_mask
hess = np.ones_like(preds)
return grad, hess
And I build model like this
def XGB(q, X_train, Y_train, X_valid, Y_valid, X_test):
# (a) Modeling
model = XGBRegressor(objective=xgb_quantile_obj, alpha=q,
n_estimators=10000, bagging_fraction=0.7, learning_rate=0.027, subsample=0.7)
model.fit(X_train, Y_train, eval_metric = xgb_quantile_eval,
eval_set=[(X_valid, Y_valid)], early_stopping_rounds=300, verbose=500)
# (b) Predictions
pred = pd.Series(model.predict(X_test).round(2))
return model, pred
But I got an error
models_2, results_2 = XGB(0.5, X_train_1, Y_train_1, X_valid_1, Y_valid_1, X_test)
results_2
AttributeError: 'numpy.ndarray' object has no attribute 'get_label'
I am not sure if I am doing well. Please help me
Oh I catch the error, I have to change the sequence between preds and dmatrix in xgb_quantile_obj and change dmatrix to labels
placeholder_X = tf.placeholder(tf.float32, shape = [None, 19])
placeholder_y = tf.placeholder(tf.float32, shape = [None,1])
#Build an iterator over training batches
#training_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
training_dataset = tf.data.Dataset.from_tensor_slices((placeholder_X, placeholder_y))
#Shuffle the dataset (note shuffle argument much larger than training size).learning_rate # shuffling of data
# and form batches of size batch_size
training_batches = training_dataset.shuffle(20000, reshuffle_each_iteration =True).repeat().batch(FLAGS.batch_size)
#training_iterator = tf.data.make_one_shot_iterator(training_batches)
#Building iterator over the heldout set with batch_size = heldout_size,
# i.e., return the entire heldout set as a constant.
val_dataset = tf.data.Dataset.from_tensor_slices((placeholder_X, placeholder_y))
val_batches = val_dataset.repeat().batch(500)
#heldout_iterator = tf.data.make_one_shot_iterator(heldout_batches)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test,y_test))
test_dataset = test_dataset.batch(500)
#Combine these into a feasible iterator that can switch between training
# and validation inputs.
# Here should be minibatch increment be defined
handle = tf.placeholder(tf.string, shape = [])
feedable_iterator = tf.data.Iterator.from_string_handle(handle, training_batches.output_types, training_batches.output_shapes)
features_final, labels_final = feedable_iterator.get_next()
#create Reinitializable iterator for Train and Validation, one hot iterator for Test
train_val_iterator = tf.data.Iterator.from_structure(training_batches.output_types, training_batches.output_shapes)
training_iterator = train_val_iterator.make_initializer(training_batches)
val_iterator = train_val_iterator.make_initializer(val_batches)
test_iterator = test_dataset.make_one_shot_iterator()
def main(argv):
# extract the activation function from the hyperopt spec as an attribute from the tf.nn module
#activation = getattr(tf.nn, FLAGS.activation_function)
# define the graph
#with tf.Graph().as_default():
# Building the Bayesian Neural Network
# we are Gaussian Reparametrization Trick
# to compute the stochastic gradients as described in the paper
with tf.compat.v1.name_scope("bayesian_neural_net", values =[features_final]):
neural_net = tf.keras.Sequential()
for i in range(FLAGS.num_hidden_layers):
layer = tfp.layers.DenseReparameterization(
units = 10,
activation = tf.nn.relu,
trainable = True,
kernel_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
#kernel_posterior_fn=tfp_layers_util.default_mean_field_normal_fn(), # softplus(sigma)
kernel_posterior_tensor_fn=lambda x: x.sample(),
bias_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
bias_posterior_tensor_fn=lambda x: x.sample()
)
neural_net.add(layer)
neural_net.add(tfp.layers.DenseReparameterization(
units=2, # one dimensional output
activation= tf.nn.softmax, # since regression (outcome not bounded)
trainable=True, # i.e subject to optimization
kernel_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag with hyperopt sigma
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
kernel_posterior_tensor_fn=lambda x: x.sample(),
bias_prior_fn =tfp.layers.default_multivariate_normal_fn, # NormalDiag with hyperopt sigma
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
bias_posterior_tensor_fn=lambda x: x.sample()
))
logits = neural_net(features_final)
#labels_distribution = tfd.Bernoulli(logits=logits)
labels_distribution = tfd.Categorical(logits=logits)
#labels_distribution = tfd.Bernoulli(logits=logits)
# Perform KL annealing. The optimal number of annealing steps
# depends on the dataset and architecture.
t = tf.Variable(0.0)
kl_regularizer = t / (FLAGS.kl_annealing * len(X_train) / FLAGS.batch_size)
#Compute the -ELBO as the loss. The kl term is annealed from 1 to 1 over
# the epochs specified by the kl_annealing flag.
log_likelihood = labels_distribution.log_prob(labels_final)
#neg_log_likelihood = tf.reduce_mean(tf.squared_difference(logits,labels_final))
neg_log_likelihood = -tf.reduce_mean(input_tensor = log_likelihood)
kl = sum(neural_net.losses)/len(X_train) * tf.minimum(1.0, kl_regularizer)
elbo_loss = neg_log_likelihood + kl
# Build metrics for evaluation. Predictions are formed from single forward
# pass of the probablisitic layers . They are cheap but noisy predictions
predictions = tf.argmax(input = logits, axis=1)
predictions = tf.cast(predictions, tf.float32)
# TP, TN, FP, FN
TP = tf.count_nonzero(predictions * labels_final)
TN = tf.count_nonzero((predictions - 1) * (labels_final - 1))
FP = tf.count_nonzero(predictions * (labels_final - 1))
FN = tf.count_nonzero((predictions - 1) * labels_final)
# precision, recall, f1
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1 = 2 * precision * recall / (precision + recall)
tpr = TP/(TP+FN)
fpr = FP/(TP+FN)
#create Reinitializable iterator for Train and Validation, one hot iterator for Test
train_val_iterator = tf.data.Iterator.from_structure(training_batches.output_types, training_batches.output_shapes)
training_iterator = train_val_iterator.make_initializer(training_batches)
val_iterator = train_val_iterator.make_initializer(val_batches)
test_iterator = test_dataset.make_one_shot_iterator()
with tf.compat.v1.name_scope("train"):
train_accuracy, train_accuracy_update_op = tf.metrics.accuracy(labels=labels_final,predictions =predictions)
opt = tf.train.AdamOptimizer(FLAGS.learning_rate)
train_op = opt.minimize(elbo_loss)
update_step_op = tf.assign(t, t+1)
with tf.compat.v1.name_scope("valid"):
valid_accuracy, validation_accuracy_update_op = tf.metrics.accuracy(labels= labels_final,predictions = predictions)
with tf.compat.v1.name_scope("test"):
test_accuracy, test_accuracy_update_op = tf.metrics.accuracy(labels = labels_final,predictions = predictions)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
saver = tf.train.Saver()
stream_vars_valid = [ v for v in tf.local_variables() if "valid" in v.name]
reset_valid_op = tf.variables_initializer(stream_vars_valid)
valid_accuracy_summary = []
stop_early =0
with tf.compat.v1.Session() as sess:
sess.run(init_op)
# Run the training loop
train_val_string, test_string = sess.run([
train_val_iterator.string_handle(),
test_iterator.string_handle()])
training_steps = int(round(FLAGS.epochs * (len(X_train) / FLAGS.batch_size)))
for step in range(training_steps):
#start reininitializable's train iterator
sess.run(training_iterator, feed_dict = {placeholder_X:X_train, placeholder_y:y_train})
#
_ = sess.run([train_op,train_accuracy_update_op, update_step_op],feed_dict={handle: train_val_string})
# Manually print the frequency
if step % 100 == 0:
save_path = saver.save(sess, "/tmp/my_model.ckpt")
loss_value, accuracy_value, kl_value = sess.run([elbo_loss, train_accuracy, kl], feed_dict= {handle: train_val_string})
print("Step:{:>3d} loss : {:.3f} KL: {:.3f}" .format(step , loss_value, accuracy_value, kl_value))
if (step +1) % FLAGS.eval_freq ==0:
# Compute log prob of heldout set by averaging draws from the model:
# p(heldout | train) = int_model p(heldout|model) p(model|train) ~= 1/n * sum_{i=1}^n p(heldout | model_i)
# where model_i is a draw from the posterior
#p(model|train)
probs = np.asarray([sess.run((labels_distribution.probs),
feed_dict ={handle: train_val_string})
for _ in range(FLAGS.num_monte_carlo)])
mean_probs = np.mean(probs, axis =0).astype(np.int32)
print(mean_probs.dtype)
_, label_vals = sess.run((features_final, labels_final), feed_dict = {handle: train_val_string})
label_vals = (label_vals).astype(np.int32)
heldout_lp = np.mean(np.log(mean_probs[np.arange(mean_probs.shape[0]), label_vals]))
print(" ...Held_out nats: {:.3f}".format(heldout_lp))
# Calculate validation accuracy
for step in range(10):
#start reinitializable's validation iterator
sess.run(val_iterator, feed_dict = {placeholder_X:X_val, placeholder_y:y_val})
sess.run(validation_accuracy_update_op, feed_dict={handle:train_val_string})
valid_value = sess.run(valid_accuracy, feed_dict={handle:train_val_string})
valid_accuracy_summary.append(valid_value)
if valid_value < max(valid_accuracy_summary) and step > 100:
stop_early += 1
if stop_early == 40:
break
else:
stop_early = 0
print("Validation Accuracy: {:.3f}".format(valid_value))
sess.run(reset_valid_op)
#Feed to r=feedable iterator the string handle
test_value, precision_value, recall_value, fpr_value, tpr_value,f1 = sess.run([test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
print("Step: {:>3d} test Accuracy: {:.3f} Precision: {:.3f} Recall: {:.3f} ".format(step, test_value, precision_value, recall_value))
print("Step: {:>3d} fpr: {:.3f} tpr: {:.3f} f1_1: {:.3f}".format( step, fpr_value, tpr_value,f1))
if __name__ == "__main__":
tf.compat.v1.app.run()
Expect the output to progress but it is giving out this error
Step: 0 loss : 0.646 KL: 0.875
Step:100 loss : 0.654 KL: 0.904
Step:200 loss : 0.657 KL: 0.906
Step:300 loss : 0.648 KL: 0.906
int32
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:137: RuntimeWarning: divide by zero encountered in log
...Held_out nats: -inf
Validation Accuracy: 0.914
Step: 9 test Accuracy: 0.000 Precision: 0.910 Recall: 1.000
Step: 9 fpr: 0.099 tpr: 1.000 f1_1: 0.953
Step:400 loss : 0.624 KL: 0.906
Step:500 loss : 0.641 KL: 0.906
Step:600 loss : 0.612 KL: 0.906
Step:700 loss : 0.579 KL: 0.906
int32
...Held_out nats: -inf
Validation Accuracy: 0.914
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in __init__(self, fetches, contraction_fn)
302 self._unique_fetches.append(ops.get_default_graph().as_graph_element(
--> 303 fetch, allow_tensor=True, allow_operation=True))
304 except TypeError as e:
14 frames
TypeError: Can not convert a float64 into a Tensor or Operation.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in __init__(self, fetches, contraction_fn)
305 raise TypeError('Fetch argument %r has invalid type %r, '
306 'must be a string or Tensor. (%s)' %
--> 307 (fetch, type(fetch), str(e)))
308 except ValueError as e:
309 raise ValueError('Fetch argument %r cannot be interpreted as a '
The exception arises because you use same name f1 as assignment, we need to change name f1 at left side.
test_value, precision_value, recall_value, fpr_value, tpr_value,f1 = sess.run([test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
change the line to
test_value, precision_value, recall_value, fpr_value, tpr_value,f1_value = sess.run([test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
Hopefully, this will work.
I'm new in tensorflow and i follow example at here, but i have one question.
Codes are as follows:
import numpy as np
import tensorflow as tf
from time import time
import math
from include.data import get_data_set
from include.model import model, lr
train_x, train_y = get_data_set("train")
test_x, test_y = get_data_set("test")
x, y, output, y_pred_cls, global_step, learning_rate = model()
global_accuracy = 0
# PARAMS
_BATCH_SIZE = 128
_EPOCH = 60
_SAVE_PATH = "./tensorboard/cifar-10-v1.0.0/"
# LOSS AND OPTIMIZER
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
beta1=0.9,
beta2=0.999,
epsilon=1e-08).minimize(loss, global_step=global_step)
# PREDICTION AND ACCURACY CALCULATION
correct_prediction = tf.equal(y_pred_cls, tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# SAVER
merged = tf.summary.merge_all()
saver = tf.train.Saver()
sess = tf.Session()
train_writer = tf.summary.FileWriter(_SAVE_PATH, sess.graph)
try:
print("\nTrying to restore last checkpoint ...")
last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=_SAVE_PATH)
saver.restore(sess, save_path=last_chk_path)
print("Restored checkpoint from:", last_chk_path)
except ValueError:
print("\nFailed to restore checkpoint. Initializing variables instead.")
sess.run(tf.global_variables_initializer())
def train(epoch):
batch_size = int(math.ceil(len(train_x) / _BATCH_SIZE))
i_global = 0
for s in range(batch_size):
batch_xs = train_x[s*_BATCH_SIZE: (s+1)*_BATCH_SIZE]
batch_ys = train_y[s*_BATCH_SIZE: (s+1)*_BATCH_SIZE]
start_time = time()
i_global, _, batch_loss, batch_acc = sess.run(
[global_step, optimizer, loss, accuracy],
feed_dict={x: batch_xs, y: batch_ys, learning_rate: lr(epoch)})
duration = time() - start_time
if s % 10 == 0:
percentage = int(round((s/batch_size)*100))
bar_len = 29
filled_len = int((bar_len*int(percentage))/100)
bar = '=' * filled_len + '>' + '-' * (bar_len - filled_len)
msg = "Global step: {:>5} - [{}] {:>3}% - acc: {:.4f} - loss: {:.4f} - {:.1f} sample/sec"
print(msg.format(i_global, bar, percentage, batch_acc, batch_loss, _BATCH_SIZE / duration))
test_and_save(i_global, epoch)
def test_and_save(_global_step, epoch):
global global_accuracy
i = 0
predicted_class = np.zeros(shape=len(test_x), dtype=np.int)
while i < len(test_x):
j = min(i + _BATCH_SIZE, len(test_x))
batch_xs = test_x[i:j, :]
batch_ys = test_y[i:j, :]
predicted_class[i:j] = sess.run(
y_pred_cls,
feed_dict={x: batch_xs, y: batch_ys, learning_rate: lr(epoch)}
)
i = j
correct = (np.argmax(test_y, axis=1) == predicted_class)
acc = correct.mean()*100
correct_numbers = correct.sum()
mes = "\nEpoch {} - accuracy: {:.2f}% ({}/{})"
print(mes.format((epoch+1), acc, correct_numbers, len(test_x)))
if global_accuracy != 0 and global_accuracy < acc:
summary = tf.Summary(value=[
tf.Summary.Value(tag="Accuracy/test", simple_value=acc),
])
train_writer.add_summary(summary, _global_step)
saver.save(sess, save_path=_SAVE_PATH, global_step=_global_step)
mes = "This epoch receive better accuracy: {:.2f} > {:.2f}. Saving session..."
print(mes.format(acc, global_accuracy))
global_accuracy = acc
elif global_accuracy == 0:
global_accuracy = acc
print("###########################################################################################################")
def main():
for i in range(_EPOCH):
print("\nEpoch: {0}/{1}\n".format((i+1), _EPOCH))
train(i)
if __name__ == "__main__":
main()
sess.close()
In this example, i think, both test and traning data feeds networks, normally only train data must feed network. I can not see any difference between train() and test_and_save() functions. Am i wrong? Thanks
Here is an explanation if I understood your question correctly. The train function is called every epoch and iterates through the training data. At the end of the epoch the test_and_save function is called where the model accuracy is evaluated. This iterates through the test data on the learned weights and calculates the accuracy and saves the model. This is repeated _EPOCH times.
Edit: The model is saved in the test_and_save function. However, the weights are only updated (gradients calculated) when optimizer is passed through sess.run() in the train function. In the test_and_save function the test data is fed to the network however only y_pred_cls is evaluated by passing to sess.run().