I'm recently learning deep reinforcement learning and I wanted to apply what I learned to a problem from gym using Keras.
During training I realized that it is too slow, after checking the reason I saw that "fit" function takes so much time.
Running each episode takes 3-4 minutes.
Is there something wrong at what I'm doing? Or can you suggest an improvement?
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
import gym
import datetime
class DQN():
def __init__(self, env):
self.env = env
self.memory = deque(maxlen=2000)
self.gamma = 0.98
self.epsilon = 1
self.epsilon_min = 0.01
self.epsilon_decay = 0.998
self.learning_rate = 0.001
self.model = self.create_model()
self.target_model = self.create_model()
def create_model(self):
model = keras.Sequential()
state_shape = self.env.observation_space.shape
model.add(keras.layers.Dense(48, activation="relu", input_dim=state_shape[0]))
model.add(keras.layers.Dense(24, activation="relu"))
model.add(keras.layers.Dense(self.env.action_space.n, activation="relu"))
model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, new_state, done):
self.memory.append([state, action, reward, new_state, done])
def replay(self):
batch_size = 32
if len(self.memory) < batch_size:
return
samples = random.sample(self.memory, batch_size)
# states, actions, rewards, states_, dones = samples
# targets = self.target_model.predict(states)
# _states = [i for i in range(len(samples))]
# targets = [[0 for j in range(self.env.action_space.n)] for i in range(len(samples))]
_states = np.zeros((len(samples), 8))
targets = np.zeros((len(samples), self.env.action_space.n))
for i, sample in enumerate(samples):
state, action, reward, new_state, done = sample
_states[i] = state
# target = self.target_model.predict(state)
if done:
targets[i][action] = reward
else:
Q_future = max(self.target_model.predict(new_state)[0])
targets[i][action] = reward + Q_future*self.gamma
self.model.fit(_states, targets, epochs=1, verbose=0)
# for sample in samples:
# state, action, reward, new_state, done = sample
# target = self.target_model.predict(state)
# if done:
# target[0][action] = reward
# else:
# Q_future = max(self.target_model.predict(new_state)[0])
# target[0][action] = reward + Q_future*self.gamma
# start_time = datetime.datetime.now()
# self.model.fit(state, target, epochs=1, verbose=0)
# end_time = datetime.datetime.now()
# print("--fit--")
# print(end_time-start_time)
def target_train(self):
weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i]
self.target_model.set_weights(target_weights)
def act(self, state):
self.epsilon *= self.epsilon_decay
self.epsilon = max(self.epsilon_min, self.epsilon)
if np.random.random() < self.epsilon:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def save_model(self, fn):
self.model.save(fn)
def act_eval(self, state):
return np.argmax(self.model.predict(state)[0])
def evaluation(self, n_eval=10):
total_reward = 0
for _ in range(n_eval):
self.env.reset()
cur_state = self.env.reset().reshape(1,8)
done = False
while not done:
action = self.act_eval(cur_state)
new_state, reward, done, _ = self.env.step(action)
total_reward += reward
cur_state = new_state.reshape(1,8)
return total_reward / n_eval
def main():
save_path = "policies/"
env = gym.make("LunarLander-v2")
trials = 2000
trial_len = 500
update_target_network = 500
agent = DQN(env=env)
for trial in range(trials):
cur_state = env.reset().reshape(1,8)
time_step_cntr = 0
# check execution durations
dur_replay = 0
dur_step = 0
dur_act = 0
for step in range(trial_len):
print("Trial {0}, step {1}".format(trial, step))
action = agent.act(cur_state) #
new_state, reward, done, _ = env.step(action) #
new_state = new_state.reshape(1,8)
agent.remember(cur_state, action, reward, new_state, done)
# learn from experience
agent.replay() #
# after "update_target_network" steps, update target network
if time_step_cntr % update_target_network == 0:
agent.target_train()
time_step_cntr += 1
cur_state = new_state
if done:
break
# print("Duration replay {0}, duration act {1}, duration step {2}".format(dur_replay, dur_act, dur_step))
# at each N steps, evaluate
print("Evaluation over 10 episodes", agent.evaluation())
print("Trial #{0} completed.".format(trial))
# # print the progress
# if trial % 100 == 0:
# print("Trial #{0} completed.".format(trial))
# save the model
# if trial % 20 == 0:
agent.save_model(save_path + str(trial) + "__.model")
agent.save_model(save_path + "_final" + "__.model")
if __name__ == "__main__":
main()
Your problem is not in the fit call, but in the loop that you have in the replay() method. Try always substituting loops by numpy operations in these cases, that make the operations much more agile.
Replace your replay method by the following one and let me know if it works faster for you
def replay(self):
batch_size = 32
if len(self.memory) >= batch_size:
# Draw a sample
samples = random.sample(self.memory, batch_size)
# Prepare the batch
state, action, reward, new_state, done = zip(*samples)
next_state = np.concatenate(new_state)
done = np.array(done)[:,None]
state = np.concatenate(state)
reward = np.array(reward)[:,None]
q_future = self.target_model.predict(next_state)
targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)*(1-done)
# Fit the model
self.model.fit(state, targets, epochs=1, verbose=0)
Related
I am using this code. I have modified it to work with a car (0 Left, 1 Straight, 2 Right).
I would like to add some observations, such as Destination (XY), Car Location (XY) bearing (angle), distance_to_destination and bearing_of_destination, in the hope that the car can find its way to the destination.
I have spent most of the days trying to get this to work, however failed, and failed in many different ways. The crux of the problem seams to be getting the input shape to match.
I think the closest that I have got is this:
def __init__(self):
low = np.array([-5, -5, -5, -5, -5])
high = -np.array([ 5, 5, 5, 5, 5])
self.observation_space = gym.spaces.Box(low, high, dtype=np.float32)
self.action_space = gym.spaces.Box(low, high, dtype=np.float32)
def reset(self):
self.state = Myarray# \[\[1,2\], \[1,2\],\[1,2\],\[1,2\],\[1,2\]\]#result.BearingToDest
self.shower_length = 60000
return self.state
def build_model(states, actions):
model = Sequential()
model.add(Dense(units=24, activation='relu', input_shape=\[ 2\]))
model.add(Dense(units=24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
When I run it and the model loads, the error message is :
ValueError: Error when checking input: expected dense_input to have 2 dimensions, but got array with shape (1, 1, 1, 2)
**Training for 1000000 steps ...
Resetting ML
Interval 1 (0 steps performed)**
First, just to make sure, the state you return in reset should look something like:
self.state = np.array([x1, y1, x2, y2, angle])
I don't see the step function in your code, but I assume you also modified it to return self.state?
Also, your action space is the same as the observation space, it is not normal is it? Given what you said, there are 3 actions so it should be:
self.action_space = Discrete(3)
Without the full code, it is not really possible to find the cause of your problem. Could you show it?
I also noticed a minus sign which I find strange (although it seems unrelated to your main problem):
high = -np.array([ 5, 5, 5, 5, 5])
^
HERE
The Code that Ran for me is below, it will take some tweeks to get everything learning in the way that I want, but at least its running :)
import gym
from gym import Env
import numpy as np
from gym.spaces import Discrete, Box, Dict
import random
Myarray = np.Myarray = [[3, 2]]
# Myarray = [[1][2 ]]
x1 = y1 = x2 = y2 = angle = 1
# create a custom class
class ShowerEnv(Env):
def __init__(self, size=5):
self.size = size
high = np.array([[600, 600, 600, 600, 360]])
low = np.array([[-1, -1, -1, -1, -360]])
self.state = np.zeros((1, 5), dtype=np.float32)
self.x1, self.y1, self.x2, self.y2, self.angle = 1, 1, 1, 1, 1
self.action_space = Discrete(3)
self.observation_space = gym.spaces.Box(low, high, dtype=np.float32, shape=(1, 5))
self.shower_length = 60 # duration of temperature
def step(self, shower_action):
x1 = y1 = x2 = y2 = angle = 1
self.shower_length -= 1
# this line sends a protobuf command to the car program and gets a response ie the true enviroment
result = client.ChangeCoarse( shower_action - 1, True)#( shower_action - 1, True)
self.state = np.array([result.X, result.Y, result.DesX, result.DesY, result.BearingToDest])
if (result.ResetML == True):
self.reset()
# x1 = y1 = x2 = y2 = angle = 1
# self.state = np.array([x1, y1, x2, y2, angle])
# this should set the reward and gets it from protobuf
reward = result.BearingToDest
# reward = 1 # just put in to make the code run
if self.shower_length <= 0:
done = True
else:
done = False
info = ()
info = {}
return self.state, reward, done, info
def render(self):
pass
def reset(self):
result = client.ChangeResetDest()
self.shower_length = 60000
self.state = np.array([result.X, result.Y, result.DesX, result.DesY, result.BearingToDest])
print("Resetting ML")
return self.state
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
env = ShowerEnv()
states = env.observation_space.shape
actions = env.action_space.n
def build_model(states, actions):
model = Sequential()
model.add(Dense(units=24, activation='relu', input_shape=states))
model.add(Dense(units=24, activation='relu'))
model.add(Dense(actions, activation='linear'))
model.add(Flatten())
return model
# model =build_model(states,actions)
# model.compile(optimizer=Adam(learning_rate=1e-3), metrics=['mae'])
# del model
#print(model.summary())
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import grpc
import Message_pb2_grpc as pb2_grpc, Message_pb2 as pb2
class UnaryClient(object):
"""
Client for gRPC functionality
"""
def __init__(self):
self.host = 'localhost'
self.server_port = 50052
# instantiate a channel
self.channel = grpc.insecure_channel(
'{}:{}'.format(self.host, self.server_port))
# bind the client and the server
self.stub = pb2_grpc.UnaryStub(self.channel)
def ChangeCoarse(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, BoatDelta=val)
# (message="message", Val=9, MoveBoat=True,MoveBoatStep=True, SailAngle=4, BoatAngle=5.79878, SailDelta=0, BoatDelta=-1)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeSail(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, SailDelta=val)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeWindDirection(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, WindDelta=val)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeResetDest(self):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, ResetTarget=True)
# (message="message", Val=9, MoveBoat=True,MoveBoatStep=True, SailAngle=4, BoatAngle=5.79878, SailDelta=0, BoatDelta=-1)
# print(f'{message}')
return self.stub.GetServerResponse(message)
client = UnaryClient()
result = client.ChangeCoarse(90, True)
# if result.
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=900000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=100,
target_model_update=1e-2)
return dqn
dqn = build_agent(build_model(states, actions), actions)
dqn.compile(optimizer=Adam(learning_rate=1e-5), metrics=['mae'])
dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)
I am using the following code (once on CPU (without the "disable_eager_execution" part)) and yet once more with GPU.
On the CPU training, one epoch takes 12 Hours but the loss changes from batch to batch and I see that training takes place.
On the GPU version. Nothing happens. Training one epoch takes around 1 Hour but loss and accuracy stay the same.
PLEASE HELP ME UNDERSTAND WHAT DO I DO WRONG...
I am running this code using aws sage maker (ml.g4dn.4xlarge)
Code:
import numpy as np
import pandas as pd
import os
import datetime
import tensorflow as tf
import re
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.python.framework.ops import disable_eager_execution
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')), '\n')
disable_eager_execution()
# Read data
# read dictionaries:
# company_dict:
company_df = pd.read_csv("/home/ec2-user/SageMaker/company_similarity/data/company_dict.csv", sep='\t', header=None)
company_df.columns = ['company_id', 'idx']
# payee dict
payee_df = pd.read_csv("/home/ec2-user/SageMaker/company_similarity/data/cleaned_up_payee_dict.csv", sep='\t', header=None)
payee_df.columns = ['payee', 'idx']
# Read raw data
BATCH_SIZE = 32
raw_data = tf.data.experimental.make_csv_dataset(
"/home/ec2-user/SageMaker/company_similarity/data/training_data.csv",
column_names=['company_id', 'payee', 'label'],
select_columns=['company_id', 'payee', 'label'],
field_delim='\t',
column_defaults=[tf.string, tf.string, tf.int32],
batch_size=BATCH_SIZE,
label_name='label',
na_value="?",
num_epochs=1,
ignore_errors=True,
)
class PreprocessingFeatures(object):
def __init__(self, company_idx, payee_idx):
self.payee_idx = payee_idx
self.company_idx = company_idx
self.symbols = '!"$%&\'\?()*+,-./:;<=>?[\\]^_`{|}~a-zA-Z0-9 '
self.payee_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=payee_idx,
mask_token=None,
num_oov_indices=1
)
self.company_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=company_idx,
mask_token=None,
num_oov_indices=1
)
def __call__(self, features, labels):
payee = self.payee_lookup(features['payee'])
company = self.company_lookup(features['company_id'])
return (company, payee), labels
payee_list = list(payee_df['payee'])
company_list = [str(si) for si in list(company_df['company_id'])]
# ************ START TRAINING ************ #
log_dir = '/home/ec2-user/SageMaker/company_similarity/models/logs/fit/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_dir = '/home/ec2-user/SageMaker/company_similarity/models/embedding_checkpoints'
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()
def present_topK(model, listA, item_title='eBay', topK=10):
'''
show top 10 similar items using model embedding
:param model: the actual model
:param item_index: dictionary with item name:index format
:param index_item: dictionary with index:item name format
:param item_title: title text
:return: table to print (string)
'''
assert item_title in listA, "Item not in Vocabulary"
emb = model.layers[2].get_weights()[0]
# we started from 1 not zero on dictionary
score = cosine_similarity(emb[listA.index(item_title)+1].reshape(1, -1), emb)[0]
similar_items = np.argsort(score)[::-1][:topK]
res = {'payee': [], 'score': []}
for i in similar_items:
res['payee'] += [listA[i-1]]
res['score'] += [score[i]]
return "\n".join("{}\t{}".format(k, v) for k, v in res.items())
class GenerateExamplesCallback(tf.keras.callbacks.Callback):
def __init__(self):
self.step = 0
def on_epoch_end(self, epoch, logs=None):
self.step += 1
self.model.save('/home/ec2-user/SageMaker/company_similarity/models/embedding_checkpoints/model_{}'.format(epoch))
sim_table = present_topK(self.model, payee_list)
print("\nSimilar Items to 'eBay': ", sim_table)
with file_writer.as_default():
tf.summary.text('Similarity sanity check', data=tf.convert_to_tensor(sim_table), step=epoch)
def on_batch_end(self, batch, logs=None):
if batch % 1000 == 0:
sim_table = present_topK(self.model, payee_list)
print("\nSimilar Items to 'eBay': ", sim_table)
with file_writer.as_default():
tf.summary.text('Similarity sanity check', data=tf.convert_to_tensor(sim_table), step=batch)
print('TensorBoard logging folder: ', log_dir)
print("checkpoint_dir:", checkpoint_dir)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True, save_freq=200000)
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, update_freq=10000)
callbacks = [checkpoint_callback, tensorboard_callback, GenerateExamplesCallback()]
# read the data
# train_data = raw_data.map(PreprocessingFeatures(company_list, payee_list)).shuffle(buffer_size=10000).repeat()
train_data = raw_data.map(PreprocessingFeatures(company_list, payee_list)).repeat()
# examples
# next(iter(raw_data.take(1)))
# next(iter(train_data))
# wc -l <filename> on terminal
fileLen = 5851184
STEPS_PER_EPOCH = (fileLen // BATCH_SIZE) + 1
# STEPS_PER_EPOCH = 1000
def build_model(company_embedding=128, payee_embedding=128, loss=tf.keras.losses.binary_crossentropy):
company_input = tf.keras.layers.Input(name='company_input', shape=(1,))
payee_input = tf.keras.layers.Input(name='payee_input', shape=(1,))
company_emb = tf.keras.layers.Embedding(name='company_embedding',
input_dim=len(company_list)+1,
output_dim=company_embedding)(company_input)
company_emb = tf.keras.layers.Flatten()(company_emb)
payee_emb = tf.keras.layers.Embedding(name='payee_embedding',
input_dim=len(payee_list)+1,
output_dim=payee_embedding)(payee_input)
payee_emb = tf.keras.layers.Flatten()(payee_emb)
merged = tf.keras.layers.Dot(name='dot', normalize=True, axes=1)([payee_emb, company_emb])
merged = tf.keras.layers.Reshape(target_shape = [1])(merged)
x = tf.keras.layers.Dense(1, activation='sigmoid')(merged)
# x = tf.keras.layers.Concatenate()([item_emb, device_emb])
# x = tf.keras.layers.Dense(128, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
# x = tf.keras.layers.Dense(64, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
# x = tf.keras.layers.Dense(32, activation='relu')(x)
# # x = tf.keras.layers.BatchNormalization()(x)
# x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=(company_input, payee_input), outputs=x)
model.compile(
loss=loss,
optimizer='adam',
metrics=['accuracy'],
)
return model
model = build_model()
EPOCHS = 5
r = model.fit(train_data,
epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
callbacks=callbacks
)
model.save("/home/ec2user/SageMaker/company_similarity/models/models/embedding_model_final.h5")
print("Training is completed")
I am training a reinforcement learning model using the cartpole environment from OpenAI gym. Despite a .h5 file for my weights and model appearing in the target directory, I get None after running the following code - tf.train.get_checkpoint_state("C:/Users/dgt/Documents").
Here is my entire code -
## Slightly modified from the following repository - https://github.com/gsurma/cartpole
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import random
import gym
import numpy as np
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
ENV_NAME = "CartPole-v1"
GAMMA = 0.95
LEARNING_RATE = 0.001
MEMORY_SIZE = 1000000
BATCH_SIZE = 20
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995
checkpoint_path = "training_1/cp.ckpt"
class DQNSolver:
def __init__(self, observation_space, action_space):
# save_dir = args.save_dir
# self.save_dir = save_dir
# if not os.path.exists(save_dir):
# os.makedirs(save_dir)
self.exploration_rate = EXPLORATION_MAX
self.action_space = action_space
self.memory = deque(maxlen=MEMORY_SIZE)
self.model = Sequential()
self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
self.model.add(Dense(24, activation="relu"))
self.model.add(Dense(self.action_space, activation="linear"))
self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() < self.exploration_rate:
return random.randrange(self.action_space)
q_values = self.model.predict(state)
return np.argmax(q_values[0])
def experience_replay(self):
if len(self.memory) < BATCH_SIZE:
return
batch = random.sample(self.memory, BATCH_SIZE)
for state, action, reward, state_next, terminal in batch:
q_update = reward
if not terminal:
q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
q_values = self.model.predict(state)
q_values[0][action] = q_update
self.model.fit(state, q_values, verbose=0)
self.exploration_rate *= EXPLORATION_DECAY
self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
def cartpole():
env = gym.make(ENV_NAME)
#score_logger = ScoreLogger(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
checkpoint = tf.train.get_checkpoint_state("C:/Users/dgt/Documents")
print('checkpoint:', checkpoint)
if checkpoint and checkpoint.model_checkpoint_path:
dqn_solver.model = keras.models.load_model('cartpole.h5')
dqn_solver.model = model.load_weights('cartpole_weights.h5')
run = 0
i = 0
while i<2:
i = i + 1
#total = 0
run += 1
state = env.reset()
state = np.reshape(state, [1, observation_space])
step = 0
while True:
step += 1
#env.render()
action = dqn_solver.act(state)
state_next, reward, terminal, info = env.step(action)
#total += reward
reward = reward if not terminal else -reward
state_next = np.reshape(state_next, [1, observation_space])
dqn_solver.remember(state, action, reward, state_next, terminal)
state = state_next
dqn_solver.model.save('cartpole.h5')
dqn_solver.model.save_weights('cartpole_weights.h5')
if terminal:
print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
#score_logger.add_score(step, run)
break
dqn_solver.experience_replay()
if __name__ == "__main__":
cartpole()
Both, the cartpole_weights.h5 and cartpole.h5 files appear in my target directory. However, I believe that another file called 'checkpoint' should also appear. My understanding is that this is the reason my code does not run.
First, the code won't run if you don't already have the weights/model saved. So I commented out the below lines and ran the script to generate the files for the first time.
checkpoint = tf.train.get_checkpoint_state(".")
print('checkpoint:', checkpoint)
if checkpoint and checkpoint.model_checkpoint_path:
dqn_solver.model = tf.keras.models.load_model('cartpole.h5')
dqn_solver.model.load_weights('cartpole_weights.h5')
Note I also modified the above code - there were some syntax errors before. In particular, this line in your post
dqn_solver.model = model.load_weights('cartpole_weights.h5')
is probably what was causing the problem, because the model.load_weights('file') method mutates model (as opposed to returning the model).
I then tested that the model weights were being saved/loaded correctly. To do this, you can do
dqn_solver = DQNSolver(observation_space, action_space)
dqn_solver.model.trainable_variables
To see the (randomly initialized) weights for when the model first gets made. Then you can load the weights with either
dqn_solver.model = tf.keras.models.load_model('cartpole.h5')
or
dqn_solver.model.load_weights('cartpole_weights.h5')
and then you can again view the trainable_variables to make sure that they are different than the initial weights, and that they are equivalent.
When you save a model, it saves the full architecture - the exact configuration of layers. When you save the weights, it just saves all the list of tensors that you can see with trainable_variables.
Note that when you load_weights, it needs to be loaded into the exact architecture the weights are for, otherwise it won't work correctly. So if you changed the model arcthiecture in DQNSolver, and then tried to load_weights for the old model, it's not going to work right. If you load_model it will reset the model to exactly how the architecture was, and also set the weights.
edit - entire modified script
## Slightly modified from the following repository - https://github.com/gsurma/cartpole
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import random
import gym
import numpy as np
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
ENV_NAME = "CartPole-v1"
GAMMA = 0.95
LEARNING_RATE = 0.001
MEMORY_SIZE = 1000000
BATCH_SIZE = 20
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995
checkpoint_path = "training_1/cp.ckpt"
class DQNSolver:
def __init__(self, observation_space, action_space):
# save_dir = args.save_dir
# self.save_dir = save_dir
# if not os.path.exists(save_dir):
# os.makedirs(save_dir)
self.exploration_rate = EXPLORATION_MAX
self.action_space = action_space
self.memory = deque(maxlen=MEMORY_SIZE)
self.model = Sequential()
self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
self.model.add(Dense(24, activation="relu"))
self.model.add(Dense(self.action_space, activation="linear"))
self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() < self.exploration_rate:
return random.randrange(self.action_space)
q_values = self.model.predict(state)
return np.argmax(q_values[0])
def experience_replay(self):
if len(self.memory) < BATCH_SIZE:
return
batch = random.sample(self.memory, BATCH_SIZE)
for state, action, reward, state_next, terminal in batch:
q_update = reward
if not terminal:
q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
q_values = self.model.predict(state)
q_values[0][action] = q_update
self.model.fit(state, q_values, verbose=0)
self.exploration_rate *= EXPLORATION_DECAY
self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
def cartpole():
env = gym.make(ENV_NAME)
#score_logger = ScoreLogger(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
# checkpoint = tf.train.get_checkpoint_state(".")
# print('checkpoint:', checkpoint)
# if checkpoint and checkpoint.model_checkpoint_path:
# dqn_solver.model = tf.keras.models.load_model('cartpole.h5')
# dqn_solver.model.load_weights('cartpole_weights.h5')
run = 0
i = 0
while i<2:
i = i + 1
#total = 0
run += 1
state = env.reset()
state = np.reshape(state, [1, observation_space])
step = 0
while True:
step += 1
#env.render()
action = dqn_solver.act(state)
state_next, reward, terminal, info = env.step(action)
#total += reward
reward = reward if not terminal else -reward
state_next = np.reshape(state_next, [1, observation_space])
dqn_solver.remember(state, action, reward, state_next, terminal)
state = state_next
dqn_solver.model.save('cartpole.h5')
dqn_solver.model.save_weights('cartpole_weights.h5')
if terminal:
print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
#score_logger.add_score(step, run)
break
dqn_solver.experience_replay()
if __name__ == "__main__":
cartpole()
#%% to load saved results
env = gym.make(ENV_NAME)
#score_logger = ScoreLogger(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
dqn_solver.model = tf.keras.models.load_model('cartpole.h5') # or
dqn_solver.model.load_weights('cartpole_weights.h5')
I was doing a Bert finetune and I had OOM issues. I heard a good method to handle this is to use "gradient accumulate". Below are my optimization.py(include the gradient accumulate)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import re
import tensorflow as tf
from tensorflow.python.training import optimizer
from tensorflow.python.framework import ops
def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu):
"""Creates an optimizer training op."""
global_step = tf.train.get_or_create_global_step()
learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)
# Implements linear decay of the learning rate.
learning_rate = tf.train.polynomial_decay(
learning_rate,
global_step,
num_train_steps,
end_learning_rate=0.0,
power=1.0,
cycle=False)
# Implements linear warmup. I.e., if global_step < num_warmup_steps, the
# learning rate will be `global_step/num_warmup_steps * init_lr`.
if num_warmup_steps:
global_steps_int = tf.cast(global_step, tf.int32)
warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)
global_steps_float = tf.cast(global_steps_int, tf.float32)
warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)
warmup_percent_done = global_steps_float / warmup_steps_float
warmup_learning_rate = init_lr * warmup_percent_done
is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
learning_rate = (
(1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)
# It is recommended that you use this optimizer for fine tuning, since this
# is how the model was trained (note that the Adam m/v variables are NOT
# loaded from init_checkpoint.)
optimizer = MultistepAdamWeightDecayOptimizer(
learning_rate=learning_rate,
weight_decay_rate=0.01,
beta_1=0.9,
beta_2=0.999, # 0.98 ONLY USED FOR PRETRAIN. MUST CHANGE AT FINE-TUNING 0.999,
epsilon=1e-6,
exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
if use_tpu:
optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
tvars = tf.trainable_variables()
grads = tf.gradients(loss, tvars)
# This is how the model was pre-trained.
(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=global_step)
# Normally the global step update is done inside of `apply_gradients`.
# However, `AdamWeightDecayOptimizer` doesn't do this. But if you use
# a different optimizer, you should probably take this line out.
new_global_step = global_step + 1
train_op = tf.group(train_op, [global_step.assign(new_global_step)])
return train_op
class MultistepAdamWeightDecayOptimizer(optimizer.Optimizer):
"""A basic Adam optimizer that includes "correct" L2 weight decay."""
def __init__(self,
learning_rate,
weight_decay_rate=0.0,
beta_1=0.9,
beta_2=0.999,
n = 1,
epsilon=1e-6,
exclude_from_weight_decay=None,
name="MultistepAdamWeightDecayOptimizer"):
"""Constructs a AdamWeightDecayOptimizer."""
super(MultistepAdamWeightDecayOptimizer, self).__init__(False, name)
self.learning_rate = learning_rate
self.weight_decay_rate = weight_decay_rate
self.beta_1 = beta_1
self.beta_2 = beta_2
self.epsilon = epsilon
self._n = n
self.exclude_from_weight_decay = exclude_from_weight_decay
self._n_t = None
def _prepare(self):
super(MultistepAdamWeightDecayOptimizer, self)._prepare()
self._n_t=tf.convert_to_tensor(self._n, name="n")
def _create_slots(self,var_list):
super(MultistepAdamWeightDecayOptimizer, self)._create_slots(var_list)
first_var = min(var_list, key=lambda x: x.name)
self._create_non_slot_variable(initial_value=0 if self._n == 1 else 1,
name="iter",
colocate_with=first_var)
for v in var_list:
self._zeros_slot(v,"grad_acc",self._name)
def _get_iter_variable(self):
if tf.contrib.eager.in_eager_mode():
graph = None
else:
graph = tf.get_default_graph()
return self._get_non_slot_variable("iter", graph=graph)
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
"""See base class."""
update_ops = []
var_list = [v for g, v in grads_and_vars if g is not None]
with ops.init_scope():
self._create_slots(var_list)
self._prepare()
for(grad, param) in grads_and_vars:
if grad is None or param is None:
continue
grad_acc = self.get_slot(param, "grad_acc")
param_name = self._get_variable_name(params.name)
m = tf.get_variable(name=param_name + "/adam_m", shape=param.shape.as_list(),
dtype=tf.float32,trainable=False, initializer=tf.zeros_initializer())
v = tf.get_variable(name =param_name + "/adam_v", shape=param.sahpe.as_list(),
dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer())
def _apply_adam(grad_acc, grad, param, m, v):
total_grad = (grad_acc + grad) / tf.cast(self._n_t, grad.dtype)
next_m = (
tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, total_grad))
next_v = (
tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
tf.square(total_grad)))
update = next_m / (tf.sqrt(next_v) + self.epsilon)
if self._do_use_weight_decay(param_name):
update += self.weight_decay_rate * param
update_with_lr =self.learning_rate * update
next_param = param - update_with_lr
adam_op = tf.group(param.assign(next_param), m.assign(next_m),
v.assign(next_v))
with tf.control_dependencies([adam_op]):
grad_acc_to_zero_op = grad_acc.assign(tf.zero_like(grad_acc), use_locking=self._use_locking)
return tf.group(adam_op, grad_acc_to_zero_op)
def _accumulate_gradient(grad_acc, grad):
assign_up = tf.assign_add(grad_acc, grad, use_locking=self._use_locking)
return tf.group(assign_op)
update_op = tf.cond(tf.equal(self._get_iter_variable(),0),
lambda: _apply_adam(grad_acc, grad, param,m, v),
lambda: _accumulate_gradient(grad_acc, grad))
update_ops.append(update_op)
apply_updates = self._finish(update_ops, name_scope=name)
return apply_updates
def _finish(self, update_ops, name_scope):
iter_=self._get_iter_variable()
with tf.control_dependencies(update_ops):
with tf.colocate_with(iter_):
update_iter = iter_.assign(tf.mod(iter_+1, self._n_t),
use_locking=self._use_locking)
return tf.group(
*update_ops + [update_iter], name=name_scope)
def _do_use_weight_decay(self, param_name):
"""Whether to use L2 weight decay for `param_name`."""
if not self.weight_decay_rate:
return False
if self.exclude_from_weight_decay:
for r in self.exclude_from_weight_decay:
if re.search(r, param_name) is not None:
return False
return True
def _get_variable_name(self, param_name):
"""Get the variable name from the tensor name."""
m = re.match("^(.*):\\d+$", param_name)
if m is not None:
param_name = m.group(1)
return param_name
After I used this optimization.py, i could use large batch. But loss did not decrease and after 300 steps(i got 550000 training data, batch size 64, iteration 1000 and epoch 20), it said: train loop marked as finished and stopped.
I am not sure what problem is, could you please help me out? thanks.
I tried to test trained data.
!/usr/bin/env python
Copyright (c) 2016 Artsiom Sanakoyeu
from __future__ import division
from chainer import iterators
import cmd_options
import dataset
import os
import time
import regressionnet
import tensorflow as tf
import copy
from tqdm import tqdm
import numpy as np
import math
import pprint
import datetime
from regressionnet import evaluate_pcp, create_sumamry
def evaluate(net, pose_loss_op, test_iterator, summary_writer, tag='test/pose_loss'):
test_it = copy.copy(test_iterator)
total_loss = 0.0
cnt = 0
num_batches = int(math.ceil(len(test_it.dataset) / test_it.batch_size))
print len(test_it.dataset)
for batch in tqdm(test_it, total=num_batches):
feed_dict = regressionnet.fill_joint_feed_dict(net,
regressionnet.batch2feeds(batch)[:3],
conv_lr=0.0,
fc_lr=0.0,
phase='test')
global_step, loss_value = net.sess.run([net.global_iter_counter, pose_loss_op],
feed_dict=feed_dict)
total_loss += loss_value * len(batch)
cnt += len(batch)
avg_loss = total_loss / len(test_it.dataset)
print 'Step {} {} = {:.3f}'.format(global_step, tag, avg_loss)
summary_writer.add_summary(create_sumamry(tag, avg_loss),
global_step=global_step)
assert cnt == 1000, 'cnt = {}'.format(cnt)
def train_loop(net, saver, loss_op, pose_loss_op, train_op, dataset_name, train_iterator, test_iterator,
val_iterator=None,
max_iter=None,
test_step=None,
snapshot_step=None,
log_step=1,
batch_size=None,
conv_lr=None,
fc_lr=None,
fix_conv_iter=None,
output_dir='results',
):
summary_step = 50
with net.graph.as_default():
summary_writer = tf.summary.FileWriter(output_dir, net.sess.graph)
summary_op = tf.summary.merge_all()
fc_train_op = net.graph.get_operation_by_name('fc_train_op')
global_step = None
for step in xrange(max_iter + 1):
# test, snapshot
if step % test_step == 0 or step + 1 == max_iter or step == fix_conv_iter:
global_step = net.sess.run(net.global_iter_counter)
evaluate_pcp(net, pose_loss_op, test_iterator, summary_writer,
dataset_name=dataset_name,
tag_prefix='test')
if val_iterator is not None:
evaluate_pcp(net, pose_loss_op, val_iterator, summary_writer,
dataset_name=dataset_name,
tag_prefix='val')
if step % snapshot_step == 0 and step > 1:
checkpoint_prefix = os.path.join(output_dir, 'checkpoint')
assert global_step is not None
saver.save(net.sess, checkpoint_prefix, global_step=global_step)
if step == max_iter:
break
# training
start_time = time.time()
feed_dict = regressionnet.fill_joint_feed_dict(net,
regressionnet.batch2feeds(train_iterator.next())[:3],
conv_lr=conv_lr,
fc_lr=fc_lr,
phase='train')
if step < fix_conv_iter:
feed_dict['lr/conv_lr:0'] = 0.0
if step < fix_conv_iter:
cur_train_op = fc_train_op
else:
cur_train_op = train_op
if step % summary_step == 0:
global_step, summary_str, _, loss_value = net.sess.run(
[net.global_iter_counter,
summary_op,
cur_train_op,
pose_loss_op],
feed_dict=feed_dict)
summary_writer.add_summary(summary_str, global_step=global_step)
else:
global_step, _, loss_value = net.sess.run(
[net.global_iter_counter, cur_train_op, pose_loss_op],
feed_dict=feed_dict)
duration = time.time() - start_time
if step % log_step == 0 or step + 1 == max_iter:
print('Step %d: train/pose_loss = %.2f (%.3f s, %.2f im/s)'
% (global_step, loss_value, duration,
batch_size // duration))
def main(argv):
"""
Run training of the Deeppose stg-1
"""
args = cmd_options.get_arguments(argv)
if not os.path.exists(args.o_dir):
os.makedirs(args.o_dir)
suffix = datetime.datetime.now().strftime("%y%m%d_%H%M%S")
with open(os.path.join(args.o_dir, 'params.dump_{}.txt'.format(suffix)), 'w') as f:
f.write('{}\n'.format(pprint.pformat(args)))
net, loss_op, pose_loss_op, train_op = regressionnet.create_regression_net(
n_joints=args.n_joints,
init_snapshot_path=args.snapshot,
is_resume=args.resume,
reset_iter_counter=args.reset_iter_counter,
reset_moving_averages=args.reset_moving_averages,
optimizer_type=args.optimizer,
gpu_memory_fraction=0.32, # Set how much GPU memory to reserve for the network
net_type=args.net_type)
with net.graph.as_default():
saver = tf.train.Saver()
print 'args.resume: {}\nargs.snapshot: {}'.format(args.resume, args.snapshot)
bbox_extension_range = (args.bbox_extension_min, args.bbox_extension_max)
if bbox_extension_range[0] is None or bbox_extension_range[1] is None:
bbox_extension_range = None
test_bbox_extension_range = None
else:
test_bbox_extension_range = (bbox_extension_range[1], bbox_extension_range[1])
train_dataset = dataset.PoseDataset(
args.train_csv_fn, args.img_path_prefix, args.im_size,
fliplr=args.fliplr,
rotate=args.rotate,
rotate_range=args.rotate_range,
shift=args.shift,
bbox_extension_range=bbox_extension_range,
min_dim=args.min_dim,
coord_normalize=args.coord_normalize,
gcn=args.gcn,
fname_index=args.fname_index,
joint_index=args.joint_index,
symmetric_joints=args.symmetric_joints,
ignore_label=args.ignore_label,
should_downscale_images=args.should_downscale_images,
downscale_height=args.downscale_height
)
test_dataset = dataset.PoseDataset(
args.test_csv_fn, args.img_path_prefix, args.im_size,
fliplr=False, rotate=False,
shift=None,
bbox_extension_range=test_bbox_extension_range,
coord_normalize=args.coord_normalize,
gcn=args.gcn,
fname_index=args.fname_index,
joint_index=args.joint_index,
symmetric_joints=args.symmetric_joints,
ignore_label=args.ignore_label,
should_return_bbox=True,
should_downscale_images=args.should_downscale_images,
downscale_height=args.downscale_height
)
np.random.seed(args.seed)
train_iterator = iterators.MultiprocessIterator(train_dataset, args.batch_size,
n_processes=args.workers, n_prefetch=3)
test_iterator = iterators.MultiprocessIterator(
test_dataset, args.batch_size,
repeat=False, shuffle=False,
n_processes=1, n_prefetch=1)
val_iterator = None
if args.val_csv_fn is not None and args.val_csv_fn != '':
small_train_dataset = dataset.PoseDataset(
args.val_csv_fn,
args.img_path_prefix, args.im_size,
fliplr=False, rotate=False,
shift=None,
bbox_extension_range=test_bbox_extension_range,
coord_normalize=args.coord_normalize,
gcn=args.gcn,
fname_index=args.fname_index,
joint_index=args.joint_index,
symmetric_joints=args.symmetric_joints,
ignore_label=args.ignore_label,
should_return_bbox=True,
should_downscale_images=args.should_downscale_images,
downscale_height=args.downscale_height
)
val_iterator = iterators.MultiprocessIterator(
small_train_dataset, args.batch_size,
repeat=False, shuffle=False,
n_processes=1, n_prefetch=1)
train_loop(net, saver, loss_op, pose_loss_op, train_op, args.dataset_name,
train_iterator, test_iterator,
val_iterator=val_iterator,
max_iter=args.max_iter,
test_step=args.test_step,
log_step=args.log_step,
snapshot_step=args.snapshot_step,
batch_size=args.batch_size,
conv_lr=args.conv_lr,
fc_lr=args.fc_lr,
fix_conv_iter=args.fix_conv_iter,
output_dir=args.o_dir
)
if __name__ == '__main__':
import sys
main(sys.argv[1:])
This is code what I used.
I traind it about 370000 iteration and I tried to test trained data.
But it shows this messages
Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator?
Traceback (most recent call last):
DataLossError (see above for traceback): Unable to open table file out/lsp_alexnet_imagenet_small/checkpoint-370000.data-00000-of-00001: Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator?
How can I resolve this problem?