I am training a reinforcement learning model using the cartpole environment from OpenAI gym. Despite a .h5 file for my weights and model appearing in the target directory, I get None after running the following code - tf.train.get_checkpoint_state("C:/Users/dgt/Documents").
Here is my entire code -
## Slightly modified from the following repository - https://github.com/gsurma/cartpole
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import random
import gym
import numpy as np
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
ENV_NAME = "CartPole-v1"
GAMMA = 0.95
LEARNING_RATE = 0.001
MEMORY_SIZE = 1000000
BATCH_SIZE = 20
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995
checkpoint_path = "training_1/cp.ckpt"
class DQNSolver:
def __init__(self, observation_space, action_space):
# save_dir = args.save_dir
# self.save_dir = save_dir
# if not os.path.exists(save_dir):
# os.makedirs(save_dir)
self.exploration_rate = EXPLORATION_MAX
self.action_space = action_space
self.memory = deque(maxlen=MEMORY_SIZE)
self.model = Sequential()
self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
self.model.add(Dense(24, activation="relu"))
self.model.add(Dense(self.action_space, activation="linear"))
self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() < self.exploration_rate:
return random.randrange(self.action_space)
q_values = self.model.predict(state)
return np.argmax(q_values[0])
def experience_replay(self):
if len(self.memory) < BATCH_SIZE:
return
batch = random.sample(self.memory, BATCH_SIZE)
for state, action, reward, state_next, terminal in batch:
q_update = reward
if not terminal:
q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
q_values = self.model.predict(state)
q_values[0][action] = q_update
self.model.fit(state, q_values, verbose=0)
self.exploration_rate *= EXPLORATION_DECAY
self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
def cartpole():
env = gym.make(ENV_NAME)
#score_logger = ScoreLogger(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
checkpoint = tf.train.get_checkpoint_state("C:/Users/dgt/Documents")
print('checkpoint:', checkpoint)
if checkpoint and checkpoint.model_checkpoint_path:
dqn_solver.model = keras.models.load_model('cartpole.h5')
dqn_solver.model = model.load_weights('cartpole_weights.h5')
run = 0
i = 0
while i<2:
i = i + 1
#total = 0
run += 1
state = env.reset()
state = np.reshape(state, [1, observation_space])
step = 0
while True:
step += 1
#env.render()
action = dqn_solver.act(state)
state_next, reward, terminal, info = env.step(action)
#total += reward
reward = reward if not terminal else -reward
state_next = np.reshape(state_next, [1, observation_space])
dqn_solver.remember(state, action, reward, state_next, terminal)
state = state_next
dqn_solver.model.save('cartpole.h5')
dqn_solver.model.save_weights('cartpole_weights.h5')
if terminal:
print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
#score_logger.add_score(step, run)
break
dqn_solver.experience_replay()
if __name__ == "__main__":
cartpole()
Both, the cartpole_weights.h5 and cartpole.h5 files appear in my target directory. However, I believe that another file called 'checkpoint' should also appear. My understanding is that this is the reason my code does not run.
First, the code won't run if you don't already have the weights/model saved. So I commented out the below lines and ran the script to generate the files for the first time.
checkpoint = tf.train.get_checkpoint_state(".")
print('checkpoint:', checkpoint)
if checkpoint and checkpoint.model_checkpoint_path:
dqn_solver.model = tf.keras.models.load_model('cartpole.h5')
dqn_solver.model.load_weights('cartpole_weights.h5')
Note I also modified the above code - there were some syntax errors before. In particular, this line in your post
dqn_solver.model = model.load_weights('cartpole_weights.h5')
is probably what was causing the problem, because the model.load_weights('file') method mutates model (as opposed to returning the model).
I then tested that the model weights were being saved/loaded correctly. To do this, you can do
dqn_solver = DQNSolver(observation_space, action_space)
dqn_solver.model.trainable_variables
To see the (randomly initialized) weights for when the model first gets made. Then you can load the weights with either
dqn_solver.model = tf.keras.models.load_model('cartpole.h5')
or
dqn_solver.model.load_weights('cartpole_weights.h5')
and then you can again view the trainable_variables to make sure that they are different than the initial weights, and that they are equivalent.
When you save a model, it saves the full architecture - the exact configuration of layers. When you save the weights, it just saves all the list of tensors that you can see with trainable_variables.
Note that when you load_weights, it needs to be loaded into the exact architecture the weights are for, otherwise it won't work correctly. So if you changed the model arcthiecture in DQNSolver, and then tried to load_weights for the old model, it's not going to work right. If you load_model it will reset the model to exactly how the architecture was, and also set the weights.
edit - entire modified script
## Slightly modified from the following repository - https://github.com/gsurma/cartpole
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import random
import gym
import numpy as np
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
ENV_NAME = "CartPole-v1"
GAMMA = 0.95
LEARNING_RATE = 0.001
MEMORY_SIZE = 1000000
BATCH_SIZE = 20
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995
checkpoint_path = "training_1/cp.ckpt"
class DQNSolver:
def __init__(self, observation_space, action_space):
# save_dir = args.save_dir
# self.save_dir = save_dir
# if not os.path.exists(save_dir):
# os.makedirs(save_dir)
self.exploration_rate = EXPLORATION_MAX
self.action_space = action_space
self.memory = deque(maxlen=MEMORY_SIZE)
self.model = Sequential()
self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
self.model.add(Dense(24, activation="relu"))
self.model.add(Dense(self.action_space, activation="linear"))
self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() < self.exploration_rate:
return random.randrange(self.action_space)
q_values = self.model.predict(state)
return np.argmax(q_values[0])
def experience_replay(self):
if len(self.memory) < BATCH_SIZE:
return
batch = random.sample(self.memory, BATCH_SIZE)
for state, action, reward, state_next, terminal in batch:
q_update = reward
if not terminal:
q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
q_values = self.model.predict(state)
q_values[0][action] = q_update
self.model.fit(state, q_values, verbose=0)
self.exploration_rate *= EXPLORATION_DECAY
self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
def cartpole():
env = gym.make(ENV_NAME)
#score_logger = ScoreLogger(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
# checkpoint = tf.train.get_checkpoint_state(".")
# print('checkpoint:', checkpoint)
# if checkpoint and checkpoint.model_checkpoint_path:
# dqn_solver.model = tf.keras.models.load_model('cartpole.h5')
# dqn_solver.model.load_weights('cartpole_weights.h5')
run = 0
i = 0
while i<2:
i = i + 1
#total = 0
run += 1
state = env.reset()
state = np.reshape(state, [1, observation_space])
step = 0
while True:
step += 1
#env.render()
action = dqn_solver.act(state)
state_next, reward, terminal, info = env.step(action)
#total += reward
reward = reward if not terminal else -reward
state_next = np.reshape(state_next, [1, observation_space])
dqn_solver.remember(state, action, reward, state_next, terminal)
state = state_next
dqn_solver.model.save('cartpole.h5')
dqn_solver.model.save_weights('cartpole_weights.h5')
if terminal:
print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
#score_logger.add_score(step, run)
break
dqn_solver.experience_replay()
if __name__ == "__main__":
cartpole()
#%% to load saved results
env = gym.make(ENV_NAME)
#score_logger = ScoreLogger(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
dqn_solver.model = tf.keras.models.load_model('cartpole.h5') # or
dqn_solver.model.load_weights('cartpole_weights.h5')
Related
I made an error when running the following code when performing multi label text classification. The code is from GitHub and the link is https://github.com/hellonlp/classifier_multi_label_seq2seq_attention ,
enter image description here
the error information is as follows, and the error code is as follows:
# -*- coding: utf-8 -*-
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
import numpy as np
import tensorflow as tf
from classifier_multi_label_seq2seq_attention.networks import NetworkAlbertSeq2Seq
from classifier_multi_label_seq2seq_attention.classifier_utils import get_features
from classifier_multi_label_seq2seq_attention.hyperparameters import Hyperparamters as hp
from classifier_multi_label_seq2seq_attention.utils import select, shuffle_one, time_now_string
from classifier_multi_label_seq2seq_attention.load import normalization_label
pwd = os.path.dirname(os.path.abspath(__file__))
MODEL = NetworkAlbertSeq2Seq(is_training=True)
# Get data features
input_ids, input_masks, segment_ids, label_ids = get_features()
num_train_samples = len(input_ids)
arr = np.arange(num_train_samples)
num_batchs = int((num_train_samples - 1) / hp.batch_size) + 1
print('Number of batch:', num_batchs)
# Set up the graph
tf.reset_default_graph()
saver = tf.train.Saver(max_to_keep=hp.max_to_keep,defer_build=True)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# Load model saved before
MODEL_SAVE_PATH = '/home/classifier_multi_label_seq2seq_attention/model/CML_Seq2Seq'
ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('Restored model!')
with sess.as_default():
# Tensorboard writer
writer = tf.summary.FileWriter(hp.logdir, sess.graph)
for i in range(hp.num_train_epochs):
indexs = shuffle_one(arr)
for j in range(num_batchs - 1):
i1 = indexs[j * hp.batch_size:min((j + 1) * hp.batch_size, num_train_samples)]
# Get features
input_id_ = select(input_ids, i1)
input_mask_ = select(input_masks, i1)
segment_id_ = select(segment_ids, i1)
label_id_ = normalization_label(select(label_ids, i1))
# Feed dict
fd = {MODEL.input_ids: input_id_,
MODEL.input_masks: input_mask_,
MODEL.segment_ids: segment_id_,
MODEL.label_ids: label_id_}
# Optimizer
sess.run(MODEL.optimizer, feed_dict=fd)
# Tensorboard
if j % hp.summary_step == 0:
summary, glolal_step = sess.run([MODEL.merged, MODEL.global_step], feed_dict=fd)
writer.add_summary(summary, glolal_step)
# Save Model
if j % (num_batchs // hp.num_saved_per_epoch) == 0:
if not os.path.exists(os.path.join(pwd, hp.file_save_model)):
os.makedirs(os.path.join(pwd, hp.file_save_model))
saver.save(sess, os.path.join(pwd, hp.file_save_model, 'model' + '_%s_%s.ckpt' % (str(i), str(j))))
# Log
if j % hp.print_step == 0:
fd = {MODEL.input_ids: input_id_,
MODEL.input_masks: input_mask_,
MODEL.segment_ids: segment_id_,
MODEL.label_ids: label_id_}
loss = sess.run(MODEL.loss, feed_dict=fd)
print('Time:%s, Epoch:%s, Batch number:%s/%s, Loss:%s' % (
time_now_string(), str(i), str(j), str(num_batchs), str(loss)))
print('Train finished')
I am using the following code (once on CPU (without the "disable_eager_execution" part)) and yet once more with GPU.
On the CPU training, one epoch takes 12 Hours but the loss changes from batch to batch and I see that training takes place.
On the GPU version. Nothing happens. Training one epoch takes around 1 Hour but loss and accuracy stay the same.
PLEASE HELP ME UNDERSTAND WHAT DO I DO WRONG...
I am running this code using aws sage maker (ml.g4dn.4xlarge)
Code:
import numpy as np
import pandas as pd
import os
import datetime
import tensorflow as tf
import re
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.python.framework.ops import disable_eager_execution
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')), '\n')
disable_eager_execution()
# Read data
# read dictionaries:
# company_dict:
company_df = pd.read_csv("/home/ec2-user/SageMaker/company_similarity/data/company_dict.csv", sep='\t', header=None)
company_df.columns = ['company_id', 'idx']
# payee dict
payee_df = pd.read_csv("/home/ec2-user/SageMaker/company_similarity/data/cleaned_up_payee_dict.csv", sep='\t', header=None)
payee_df.columns = ['payee', 'idx']
# Read raw data
BATCH_SIZE = 32
raw_data = tf.data.experimental.make_csv_dataset(
"/home/ec2-user/SageMaker/company_similarity/data/training_data.csv",
column_names=['company_id', 'payee', 'label'],
select_columns=['company_id', 'payee', 'label'],
field_delim='\t',
column_defaults=[tf.string, tf.string, tf.int32],
batch_size=BATCH_SIZE,
label_name='label',
na_value="?",
num_epochs=1,
ignore_errors=True,
)
class PreprocessingFeatures(object):
def __init__(self, company_idx, payee_idx):
self.payee_idx = payee_idx
self.company_idx = company_idx
self.symbols = '!"$%&\'\?()*+,-./:;<=>?[\\]^_`{|}~a-zA-Z0-9 '
self.payee_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=payee_idx,
mask_token=None,
num_oov_indices=1
)
self.company_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=company_idx,
mask_token=None,
num_oov_indices=1
)
def __call__(self, features, labels):
payee = self.payee_lookup(features['payee'])
company = self.company_lookup(features['company_id'])
return (company, payee), labels
payee_list = list(payee_df['payee'])
company_list = [str(si) for si in list(company_df['company_id'])]
# ************ START TRAINING ************ #
log_dir = '/home/ec2-user/SageMaker/company_similarity/models/logs/fit/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_dir = '/home/ec2-user/SageMaker/company_similarity/models/embedding_checkpoints'
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()
def present_topK(model, listA, item_title='eBay', topK=10):
'''
show top 10 similar items using model embedding
:param model: the actual model
:param item_index: dictionary with item name:index format
:param index_item: dictionary with index:item name format
:param item_title: title text
:return: table to print (string)
'''
assert item_title in listA, "Item not in Vocabulary"
emb = model.layers[2].get_weights()[0]
# we started from 1 not zero on dictionary
score = cosine_similarity(emb[listA.index(item_title)+1].reshape(1, -1), emb)[0]
similar_items = np.argsort(score)[::-1][:topK]
res = {'payee': [], 'score': []}
for i in similar_items:
res['payee'] += [listA[i-1]]
res['score'] += [score[i]]
return "\n".join("{}\t{}".format(k, v) for k, v in res.items())
class GenerateExamplesCallback(tf.keras.callbacks.Callback):
def __init__(self):
self.step = 0
def on_epoch_end(self, epoch, logs=None):
self.step += 1
self.model.save('/home/ec2-user/SageMaker/company_similarity/models/embedding_checkpoints/model_{}'.format(epoch))
sim_table = present_topK(self.model, payee_list)
print("\nSimilar Items to 'eBay': ", sim_table)
with file_writer.as_default():
tf.summary.text('Similarity sanity check', data=tf.convert_to_tensor(sim_table), step=epoch)
def on_batch_end(self, batch, logs=None):
if batch % 1000 == 0:
sim_table = present_topK(self.model, payee_list)
print("\nSimilar Items to 'eBay': ", sim_table)
with file_writer.as_default():
tf.summary.text('Similarity sanity check', data=tf.convert_to_tensor(sim_table), step=batch)
print('TensorBoard logging folder: ', log_dir)
print("checkpoint_dir:", checkpoint_dir)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True, save_freq=200000)
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, update_freq=10000)
callbacks = [checkpoint_callback, tensorboard_callback, GenerateExamplesCallback()]
# read the data
# train_data = raw_data.map(PreprocessingFeatures(company_list, payee_list)).shuffle(buffer_size=10000).repeat()
train_data = raw_data.map(PreprocessingFeatures(company_list, payee_list)).repeat()
# examples
# next(iter(raw_data.take(1)))
# next(iter(train_data))
# wc -l <filename> on terminal
fileLen = 5851184
STEPS_PER_EPOCH = (fileLen // BATCH_SIZE) + 1
# STEPS_PER_EPOCH = 1000
def build_model(company_embedding=128, payee_embedding=128, loss=tf.keras.losses.binary_crossentropy):
company_input = tf.keras.layers.Input(name='company_input', shape=(1,))
payee_input = tf.keras.layers.Input(name='payee_input', shape=(1,))
company_emb = tf.keras.layers.Embedding(name='company_embedding',
input_dim=len(company_list)+1,
output_dim=company_embedding)(company_input)
company_emb = tf.keras.layers.Flatten()(company_emb)
payee_emb = tf.keras.layers.Embedding(name='payee_embedding',
input_dim=len(payee_list)+1,
output_dim=payee_embedding)(payee_input)
payee_emb = tf.keras.layers.Flatten()(payee_emb)
merged = tf.keras.layers.Dot(name='dot', normalize=True, axes=1)([payee_emb, company_emb])
merged = tf.keras.layers.Reshape(target_shape = [1])(merged)
x = tf.keras.layers.Dense(1, activation='sigmoid')(merged)
# x = tf.keras.layers.Concatenate()([item_emb, device_emb])
# x = tf.keras.layers.Dense(128, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
# x = tf.keras.layers.Dense(64, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
# x = tf.keras.layers.Dense(32, activation='relu')(x)
# # x = tf.keras.layers.BatchNormalization()(x)
# x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=(company_input, payee_input), outputs=x)
model.compile(
loss=loss,
optimizer='adam',
metrics=['accuracy'],
)
return model
model = build_model()
EPOCHS = 5
r = model.fit(train_data,
epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
callbacks=callbacks
)
model.save("/home/ec2user/SageMaker/company_similarity/models/models/embedding_model_final.h5")
print("Training is completed")
I'm recently learning deep reinforcement learning and I wanted to apply what I learned to a problem from gym using Keras.
During training I realized that it is too slow, after checking the reason I saw that "fit" function takes so much time.
Running each episode takes 3-4 minutes.
Is there something wrong at what I'm doing? Or can you suggest an improvement?
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
import gym
import datetime
class DQN():
def __init__(self, env):
self.env = env
self.memory = deque(maxlen=2000)
self.gamma = 0.98
self.epsilon = 1
self.epsilon_min = 0.01
self.epsilon_decay = 0.998
self.learning_rate = 0.001
self.model = self.create_model()
self.target_model = self.create_model()
def create_model(self):
model = keras.Sequential()
state_shape = self.env.observation_space.shape
model.add(keras.layers.Dense(48, activation="relu", input_dim=state_shape[0]))
model.add(keras.layers.Dense(24, activation="relu"))
model.add(keras.layers.Dense(self.env.action_space.n, activation="relu"))
model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, new_state, done):
self.memory.append([state, action, reward, new_state, done])
def replay(self):
batch_size = 32
if len(self.memory) < batch_size:
return
samples = random.sample(self.memory, batch_size)
# states, actions, rewards, states_, dones = samples
# targets = self.target_model.predict(states)
# _states = [i for i in range(len(samples))]
# targets = [[0 for j in range(self.env.action_space.n)] for i in range(len(samples))]
_states = np.zeros((len(samples), 8))
targets = np.zeros((len(samples), self.env.action_space.n))
for i, sample in enumerate(samples):
state, action, reward, new_state, done = sample
_states[i] = state
# target = self.target_model.predict(state)
if done:
targets[i][action] = reward
else:
Q_future = max(self.target_model.predict(new_state)[0])
targets[i][action] = reward + Q_future*self.gamma
self.model.fit(_states, targets, epochs=1, verbose=0)
# for sample in samples:
# state, action, reward, new_state, done = sample
# target = self.target_model.predict(state)
# if done:
# target[0][action] = reward
# else:
# Q_future = max(self.target_model.predict(new_state)[0])
# target[0][action] = reward + Q_future*self.gamma
# start_time = datetime.datetime.now()
# self.model.fit(state, target, epochs=1, verbose=0)
# end_time = datetime.datetime.now()
# print("--fit--")
# print(end_time-start_time)
def target_train(self):
weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i]
self.target_model.set_weights(target_weights)
def act(self, state):
self.epsilon *= self.epsilon_decay
self.epsilon = max(self.epsilon_min, self.epsilon)
if np.random.random() < self.epsilon:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def save_model(self, fn):
self.model.save(fn)
def act_eval(self, state):
return np.argmax(self.model.predict(state)[0])
def evaluation(self, n_eval=10):
total_reward = 0
for _ in range(n_eval):
self.env.reset()
cur_state = self.env.reset().reshape(1,8)
done = False
while not done:
action = self.act_eval(cur_state)
new_state, reward, done, _ = self.env.step(action)
total_reward += reward
cur_state = new_state.reshape(1,8)
return total_reward / n_eval
def main():
save_path = "policies/"
env = gym.make("LunarLander-v2")
trials = 2000
trial_len = 500
update_target_network = 500
agent = DQN(env=env)
for trial in range(trials):
cur_state = env.reset().reshape(1,8)
time_step_cntr = 0
# check execution durations
dur_replay = 0
dur_step = 0
dur_act = 0
for step in range(trial_len):
print("Trial {0}, step {1}".format(trial, step))
action = agent.act(cur_state) #
new_state, reward, done, _ = env.step(action) #
new_state = new_state.reshape(1,8)
agent.remember(cur_state, action, reward, new_state, done)
# learn from experience
agent.replay() #
# after "update_target_network" steps, update target network
if time_step_cntr % update_target_network == 0:
agent.target_train()
time_step_cntr += 1
cur_state = new_state
if done:
break
# print("Duration replay {0}, duration act {1}, duration step {2}".format(dur_replay, dur_act, dur_step))
# at each N steps, evaluate
print("Evaluation over 10 episodes", agent.evaluation())
print("Trial #{0} completed.".format(trial))
# # print the progress
# if trial % 100 == 0:
# print("Trial #{0} completed.".format(trial))
# save the model
# if trial % 20 == 0:
agent.save_model(save_path + str(trial) + "__.model")
agent.save_model(save_path + "_final" + "__.model")
if __name__ == "__main__":
main()
Your problem is not in the fit call, but in the loop that you have in the replay() method. Try always substituting loops by numpy operations in these cases, that make the operations much more agile.
Replace your replay method by the following one and let me know if it works faster for you
def replay(self):
batch_size = 32
if len(self.memory) >= batch_size:
# Draw a sample
samples = random.sample(self.memory, batch_size)
# Prepare the batch
state, action, reward, new_state, done = zip(*samples)
next_state = np.concatenate(new_state)
done = np.array(done)[:,None]
state = np.concatenate(state)
reward = np.array(reward)[:,None]
q_future = self.target_model.predict(next_state)
targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)*(1-done)
# Fit the model
self.model.fit(state, targets, epochs=1, verbose=0)
Dataset.py
import os
import random
from skimage import io
import cv2
from skimage.transform import resize
import numpy as np
import tensorflow as tf
import keras
import Augmentor
def iter_sequence_infinite(seq):
"""Iterate indefinitely over a Sequence.
# Arguments
seq: Sequence object
# Returns
Generator yielding batches.
"""
while True:
for item in seq:
yield item
# data generator class
class DataGenerator(keras.utils.Sequence):
def __init__(self, ids, imgs_dir, masks_dir, batch_size=10, img_size=128, n_classes=1, n_channels=3, shuffle=True):
self.id_names = ids
self.indexes = np.arange(len(self.id_names))
self.imgs_dir = imgs_dir
self.masks_dir = masks_dir
self.batch_size = batch_size
self.img_size = img_size
self.n_classes = n_classes
self.n_channels = n_channels
self.shuffle = shuffle
self.on_epoch_end()
# for printing the statistics of the function
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.id_names))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation__(self, id_name):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
# Initialization
img_path = os.path.join(self.imgs_dir, id_name) # polyp segmentation/images/id_name.jpg
mask_path = os.path.join(self.masks_dir, id_name) # polyp segmenatation/masks/id_name.jpg
img = io.imread(img_path)
mask = cv2.imread(mask_path)
p = Augmentor.DataPipeline([[img, mask]])
p.resize(probability=1.0, width=self.img_size, height=self.img_size)
p.rotate_without_crop(probability=0.3, max_left_rotation=10, max_right_rotation=10)
#p.random_distortion(probability=0.3, grid_height=10, grid_width=10, magnitude=1)
p.shear(probability=0.3, max_shear_left=1, max_shear_right=1)
#p.skew_tilt(probability=0.3, magnitude=0.1)
p.flip_random(probability=0.3)
sample_p = p.sample(1)
sample_p = np.array(sample_p).squeeze()
p_img = sample_p[0]
p_mask = sample_p[1]
augmented_mask = (p_mask // 255) * 255 # denoising
q = Augmentor.DataPipeline([[p_img]])
q.random_contrast(probability=0.3, min_factor=0.2, max_factor=1.0) # low to High
q.random_brightness(probability=0.3, min_factor=0.2, max_factor=1.0) # dark to bright
sample_q = q.sample(1)
sample_q = np.array(sample_q).squeeze()
image = sample_q
mask = augmented_mask[::, ::, 0]
"""
# reading the image from dataset
## Reading Image
image = io.imread(img_path) # reading image to image vaiable
image = resize(image, (self.img_size, self.img_size), anti_aliasing=True) # resizing input image to 128 * 128
mask = io.imread(mask_path, as_gray=True) # mask image of same size with all zeros
mask = resize(mask, (self.img_size, self.img_size), anti_aliasing=True) # resizing mask to fit the 128 * 128 image
mask = np.expand_dims(mask, axis=-1)
"""
# image normalization
image = image / 255.0
mask = mask / 255.0
return image, mask
def __len__(self):
"Denotes the number of batches per epoch"
return int(np.floor(len(self.id_names) / self.batch_size))
def __getitem__(self, index): # index : batch no.
# Generate indexes of the batch
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
batch_ids = [self.id_names[k] for k in indexes]
imgs = list()
masks = list()
for id_name in batch_ids:
img, mask = self.__data_generation__(id_name)
imgs.append(img)
masks.append(np.expand_dims(mask,-1))
imgs = np.array(imgs)
masks = np.array(masks)
return imgs, masks # return batch
train.py
import argparse
import logging
import os
import sys
from tqdm import tqdm # progress bar
import numpy as np
import matplotlib.pyplot as plt
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import segmentation_models as sm
from segmentation_models.utils import set_trainable
from dataset import DataGenerator, iter_sequence_infinite
def train_model(model, train_gen, valid_gen, epochs, save_cp=True):
total_batch_count = 0
train_img_num = len(train_gen.id_names)
train_batch_num = len(train_gen)
train_gen_out = iter_sequence_infinite(train_gen)
valid_batch_num = len(valid_gen)
valid_img_num = len(valid_gen.id_names)
valid_gen_out = iter_sequence_infinite(valid_gen)
for epoch in range(epochs): # interation as many epochs
set_trainable(model)
epoch_loss = 0 # loss in this epoch
epoch_iou = 0
count = 0
with tqdm(total=train_img_num, desc=f'Epoch {epoch + 1}/{epochs}', position=0, leave=True, unit='img') as pbar: # make progress bar
for _ in range(train_batch_num):
batch = next(train_gen_out)
imgs = batch[0]
true_masks = batch[1]
loss, iou = model.train_on_batch(imgs, true_masks) # value of loss of this batch
epoch_loss += loss
epoch_iou += iou
pbar.set_postfix(**{'Batch loss': loss, 'Batch IoU': iou}) # floating the loss at the post in the pbar
pbar.update(imgs.shape[0]) # update progress
count += 1
total_batch_count += 1
train_gen.on_epoch_end()
print( "Epoch : loss: {}, IoU : {}".format(epoch_loss/count, epoch_iou/count))
# Do validation
validation_model(model, valid_gen_out, valid_batch_num, valid_img_num)
valid_gen.on_epoch_end()
if save_cp:
try:
if not os.path.isdir(checkpoint_dir):
os.mkdir(checkpoint_dir)
logging.info('Created checkpoint directory')
else:
pass
except OSError:
pass
model.save_weights(os.path.join(checkpoint_dir , f'CP_epoch{epoch + 1}.h5'))
logging.info(f'Checkpoint {epoch + 1} saved !')
def validation_model(model, valid_gen_out, valid_batch_num, valid_img_num):
epoch_loss = 0 # loss in this epoch
epoch_iou = 0
count = 0
with tqdm(total=valid_img_num, desc='Validation round', position=0, leave=True, unit='img') as pbar: # make progress bar
for _ in range(valid_batch_num):
batch = next(valid_gen_out)
imgs = batch[0]
true_masks = batch[1]
loss, iou = model.test_on_batch(imgs, true_masks) # value of loss of this batch
epoch_loss += loss
epoch_iou += iou
pbar.set_postfix(**{'Batch, loss': loss, 'Batch IoU': iou}) # floating the loss at the post in the pbar
pbar.update(imgs.shape[0]) # update progress
count += 1
print("Validation loss: {}, IoU: {}".format(epoch_loss / count, epoch_iou / count))
pred_mask = model.predict(np.expand_dims(imgs[0],0))
plt.subplot(131)
plt.imshow(imgs[0])
plt.subplot(132)
plt.imshow(true_masks[0].squeeze(), cmap="gray")
plt.subplot(133)
plt.imshow(pred_mask.squeeze(), cmap="gray")
plt.show()
print()
def get_args():
parser = argparse.ArgumentParser(description='Train the UNet on images and target masks',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-e', '--epochs', metavar='E', type=int, default=50,
help='Number of epochs', dest='epochs')
parser.add_argument('-b', '--batch_size', metavar='B', type=int, nargs='?', default=2,
help='Batch size', dest='batch_size')
parser.add_argument('-l', '--learning-rate', metavar='LR', type=float, nargs='?', default=1e-5,
help='Learning rate', dest='lr')
parser.add_argument('-bb', '--backbone', default='resnet50', metavar='FILE',
help="backcone name")
parser.add_argument('-w', '--weight', dest='load', type=str, default=False,
help='Load model from a .h5 file')
parser.add_argument('-s', '--resizing', dest='resizing', type=int, default=384,
help='Downscaling factor of the images')
parser.add_argument('-v', '--validation', dest='val', type=float, default=20.0,
help='Percent of the data that is used as validation (0-100)')
return parser.parse_args()
if __name__ == '__main__':
img_dir = './data/train/imgs/' # ./data/train/imgs/CVC_Original/'
mask_dir = './data/train/masks/' # ./data/train/masks/CVC_Ground Truth/'
checkpoint_dir = './checkpoints'
args = get_args()
# train path
train_ids = os.listdir(img_dir)
# Validation Data Size
n_val = int(len(train_ids) * args.val/100) # size of validation set
valid_ids = train_ids[:n_val] # list of image ids used for validation of result 0 to 9
train_ids = train_ids[n_val:] # list of image ids used for training dataset
# print(valid_ids, "\n\n")
print("training_size: ", len(train_ids), "validation_size: ", len(valid_ids))
train_gen = DataGenerator(train_ids, img_dir, mask_dir, img_size=args.resizing, batch_size=args.batch_size)
valid_gen = DataGenerator(valid_ids, img_dir, mask_dir, img_size=args.resizing, batch_size=args.batch_size)
print("total training batches: ", len(train_gen))
print("total validaton batches: ", len(valid_gen))
train_steps = len(train_ids) // args.batch_size
valid_steps = len(valid_ids) // args.batch_size
# define model
model = sm.Unet(args.backbone, encoder_weights='imagenet')
optimizer = optimizers.Adam(lr=args.lr, decay=1e-4)
model.compile(
optimizer=optimizer,
# "Adam",
loss=sm.losses.bce_dice_loss, # sm.losses.bce_jaccard_loss, # sm.losses.binary_crossentropy,
metrics=[sm.metrics.iou_score],
)
#model.summary()
callbacks = [
EarlyStopping(patience=6, verbose=1),
ReduceLROnPlateau(factor=0.1, patience=3, min_lr=1e-7, verbose=1),
ModelCheckpoint('./weights.Epoch{epoch:02d}-Loss{loss:.3f}-VIou{val_iou_score:.3f}.h5', verbose=1,
monitor='val_accuracy', save_best_only=True, save_weights_only=True)
]
train_model(model=model, train_gen=train_gen, valid_gen=valid_gen, epochs=args.epochs)
When I try to run this code, some epochs are well progressed but, in 20epochs, it occurs gpu memory overflow error like below
(0) Resource exhausted: OOM when allocating tensor with shape[2,64,96,96] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[{{node decoder_stage2b_bn/FusedBatchNorm}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
so, I think that it is because of data generation.
This code generate batch in this order.
in train.py, initialize Datageneratr class which is sequence model that is implemented in Dataset.py
train_gen = DataGenerator(train_ids, img_dir, mask_dir, img_size=args.resizing, batch_size=args.batch_size)
valid_gen = DataGenerator(valid_ids, img_dir, mask_dir, img_size=args.resizing, batch_size=args.batch_size)
At the first in the function 'train_model' convert Datagenerator(sequence model) to generator with using function 'iter_sequence_infinite'
train_gen_out = iter_sequence_infinite(train_gen)
valid_gen_out = iter_sequence_infinite(valid_gen)
using magic-function, 'next', get batch
batch = next(train_gen_out)
I think that there will be no memory problem but it's occurred.
What is the problem and how to solve it?
Thanks.
I meet a really strange problem that my squared loss becomes negative. Here's my code.
#!/usr/bin/python
# -*- coding:utf8 -*-
from __future__ import print_function
from models.vgg16 import VGG16_fixed
from keras.backend.tensorflow_backend import set_session
from scipy.misc import imsave
from models.generative_model_v2 import gen_model_v2
from scripts.image_process import *
from scripts.utils_func import *
from tensorflow.python import debug as tf_debug
import tensorflow as tf
import os
import time
# configure gpu usage
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
set_session(tf.Session(config=config)) # pass gpu setting to Keras
# set learning phase, or batch norm won't work
K.set_learning_phase(1)
# dataset setting
width, height = 256, 256
coco_img_path = '../../dataset/coco/images/train2014/'
sl_img_path = './images/style/'
# a trade-off coefficient between content loss and style loss, which is multiplied with style loss
alpha = 1
# create placeholders for input images
if K.image_data_format() == 'channels_last':
content_img_shape = [width, height, 3]
style_img_shape = [width, height, 3]
else:
content_img_shape = [3, width, height]
style_img_shape = [3, width, height]
with tf.name_scope('input'):
content_img = tf.placeholder(dtype='float32',
shape=(None, content_img_shape[0], content_img_shape[1], content_img_shape[2]),
name='content_img')
style_img = tf.placeholder(dtype='float32',
shape=(None, style_img_shape[0], style_img_shape[1], style_img_shape[2]),
name='style_img')
# load model
main_model, outputs = gen_model_v2(input_content_tensor=content_img, input_style_tensor=style_img)
concact_input = K.concatenate([content_img,
outputs,
style_img], axis=0)
vgg16_model = VGG16_fixed(input_tensor=concact_input,
weights='imagenet', include_top=False)
# get the symbolic outputs of each "key" layer (we gave them unique names).
vgg16_outputs_dict = dict([(layer.name, layer.output) for layer in vgg16_model.layers])
# get relevant layers
content_feature_layers = 'block3_conv3'
style_feature_layers = ['block1_conv2', 'block2_conv2',
'block3_conv3', 'block4_conv3']
# content loss
ct_loss = K.variable(0.)
layer_features = vgg16_outputs_dict[content_feature_layers]
content_img_features = layer_features[0, :, :, :]
outputs_img_features = layer_features[1, :, :, :]
ct_loss += content_loss(content_img_features, outputs_img_features)
# style loss
sl_loss_temp = K.variable(0.)
for layer_name in style_feature_layers:
layer_features = vgg16_outputs_dict[layer_name]
outputs_img_features = layer_features[1, :, :, :]
style_img_features = layer_features[2, :, :, :]
sl = style_loss(style_img_features, outputs_img_features)
sl_loss_temp += (alpha / len(style_feature_layers)) * sl
sl_loss = sl_loss_temp
# combine loss
loss = ct_loss + sl_loss
# write in summary
tf.summary.scalar('content_loss', ct_loss)
tf.summary.scalar("style_loss", sl_loss)
tf.summary.scalar("loss", loss)
# optimization
train_op = tf.train.AdamOptimizer(learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-08).minimize(loss)
with tf.Session(config=config) as sess:
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('./logs/gen_model_v2',
sess.graph)
# initialize all variables
tf.global_variables_initializer().run()
# get training image
ct_img_name = [x for x in os.listdir(coco_img_path) if x.endswith(".jpg")]
ct_img_num = len(ct_img_name)
print("content image number: ", ct_img_num)
sl_img_name = [x for x in os.listdir(sl_img_path) if x.endswith(".jpg")]
sl_img_num = len(sl_img_name)
print("style image number: ", sl_img_num)
# start training
start_time = time.time()
for i in range(1):
itr = 0
for ct_name in ct_img_name:
if itr > 10: # used to train a small sample of ms coco
break
sl_name = sl_img_name[itr % sl_img_num]
_, loss_val, summary = sess.run([train_op, loss, merged],
feed_dict={content_img: preprocess_image(coco_img_path + ct_name, height, width),
style_img: preprocess_image(sl_img_path + sl_name, height, width)})
train_writer.add_summary(summary, itr * (i+1))
print('iteration', itr, 'loss =', loss_val)
itr += 1
end_time = time.time()
print('Training completed in %ds' % (end_time - start_time))
# save model
main_model.save('./models/gen_model_v2_1.h5')
# use images to test
test_ct_img_path = './images/content/train-1.jpg'
test_ct_img = preprocess_image(test_ct_img_path, height, width)
test_sl_img_path = './images/style/starry_night.jpg'
test_sl_img = preprocess_image(test_ct_img_path, height, width)
# feed test images into model
output = sess.run(outputs, feed_dict={content_img: test_ct_img, style_img: test_sl_img})
output = deprocess_image(output)
print('Output image shape:', output.shape[1:4])
imsave('./images/autoencoder/test_v2_1.png', output[0])
and my loss function is defined as below:
#!/usr/bin/python
# -*- coding:utf8 -*-
import numpy as np
from keras import backend as K
import tensorflow as tf
# the gram matrix of an image tensor (feature-wise outer product)
def gram_matrix(x):
assert K.ndim(x) == 3
if K.image_data_format() == 'channels_first':
features = K.batch_flatten(x)
else:
features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
gram = K.dot(features, K.transpose(features))
return gram
def style_loss(featuremap_1, featuremap_2):
assert K.ndim(featuremap_1) == 3
assert K.ndim(featuremap_2) == 3
g1 = gram_matrix(featuremap_1)
g2 = gram_matrix(featuremap_2)
channels = 3
if K.image_data_format() == 'channels_first':
size = featuremap_1.shape[1] * featuremap_1[2]
else:
size = K.shape(featuremap_1)[0] * K.shape(featuremap_1)[1]
size = K.cast(size, tf.float32)
return K.sum(K.square(g1 - g2)) / (4. * (channels ** 2) * (size ** 2))
def content_loss(base, combination):
return K.sum(K.square(combination - base))
So, you can see my loss value is squared using K.square(). How can it be a negative value?
This is the result of my code, that the loss decrease sharply, which seems impossible.
You're starting with a ct_loss as a variable. Just set it to the content loss.
ct_loss = content_loss(content_img_features, outputs_img_features)