DQN is not training well - tensorflow

import tensorflow as tf
import keras
import numpy as np
import gym
import random
from keras.layers import *
model = keras.models.Sequential()
model.add(Dense(12,activation = 'tanh',input_shape = (4,)))
model.add(Dense(2,activation = 'linear'))
model.compile(optimizer = 'adam',loss = 'MSE',metrics = ['accuracy'])
env = gym.make("CartPole-v1")
def preprocessing(state):
return np.reshape(state,(1,4))
replay_mem = []
replay_size = 32
reward_list = []
local_mean = list()
for episode in range(2000):
done = False
state = env.reset()
count = 0
reward_tot = 0
model.save("cartpole-dqn.h5")
while not done:
count += 1
e = (1/(episode/200+1))
#epslion greedy search
Q = model.predict(preprocessing(state))
if e>np.random.rand(1):
action = env.action_space.sample()
else:
action = np.argmax(Q)
#take_action and set reward
state_next, reward, done, info = env.step(action)
if done:
reward = - 100
#replay_mem
replay_mem.append([state,action,reward,state_next,done])
if len(replay_mem)>2048:
del replay_mem[0]
state = state_next
reward_tot += reward
state = state_next
Q_list = []
state_list = []
#set this_replay_Size
if len(replay_mem)<replay_size:
this_replay_size = len(replay_mem)
else:
this_replay_size = replay_size
#sample random batch from replay_memory
for sample in random.sample(replay_mem,this_replay_size):
state_m,action_m,reward_m,state_next_m,done_m = sample
if done:
Q[0,action] = reward_m
else:
Q_new = model.predict(preprocessing(state_next_m))
Q[0,action] = reward_m + 0.97*np.max(Q_new)
Q_list.append(Q)
state_list.append(state_m)
#convert to nupmy array and train
Q_list = np.array(Q_list)
state_list = np.array(state_list)
hist = model.fit(state_list,Q_list,epochs = 5,verbose = 0)
#print("Done :",done," Reward :",reward," Reward_total :",reward_tot)
local_mean.append(reward_tot)
reward_list.append(reward_tot)
if episode%10 == 0:
print("Episode :",episode+1," Reward_total :", reward_tot," Reward_local_mean :",np.mean(local_mean))
local_mean = list()
print("*******End Learning")
this is my full code of my model
i implement dqn algorithm
what's wrong with my code? i've train this model abbout 1000episode but there no progress
what should i change?
should i train more episode?
or is there anything wrong with my implementation?
i've been working on this project soo long help me please

Related

Numpy load large file extremely slowly when tensorflow is training

I try training 10 tasks at the same time.(each GPU one task).
I use the shell script to start 10 python processes at the same time.
When the dataset file is small, the ten tasks almost finish the loading process at the same time, and their training process are fine and fast.
When the dataset file is large(almost 70G), then some tasks may finish the loading process faster and start to train while other tasks are still in loading process. But after some tasks get into training process, the others' loading speed will be very very slowly.And the training speed also become slowly.
I use numpy to load the dataset. The dataset will be completely loaded at first and sent into the model with tf.data.dataset.
I am wandering while the tensorflow training process will conflict with the numpy load process in other tasks and how to fix it.
def load_factor_file(in_file):
try:
key_name = os.path.basename(in_file).split('.')[0]
factor_arr = np.load(in_file)
return {key_name: factor_arr}
except Exception as e:
logger.error(f"{str(e)}")
return {}
def load_factor_files(in_files, store_dict):
for f in in_files:
fo = load_factor_file(f)
store_dict.update(fo)
del fo
def get_tensor_feature_label(version, txt_files, batch_size, run_mode, label_num, feature_np_dict, label_np_dict):
def get_data_from_cache(string_message):
feature_index, label_index, key = string_message.decode().split(',')
feature_index = int(feature_index)
label_index = int(label_index)
slice_arr = feature_np_dict[key][feature_index - FACTOR_WINDOW + 1: feature_index + 1]
label_value = label_np_dict[key][label_index]
x_data = slice_arr.astype(np.float32)
y_data = label_value.astype(np.float32)
return x_data, y_data
def parser(txt_string_in):
feat, label = tf.numpy_function(
get_data_from_cache, [txt_string_in], (tf.float32, tf.float32))
feat = tf.reshape(feat, (FACTOR_WINDOW, FACTOR_NUM_MAP[version]))
label = tf.reshape(label, (label_num,))
return feat, label
ds = tf.data.TextLineDataset.list_files(txt_files)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TextLineDataset,
cycle_length=8,
sloppy=False))
ds = ds.prefetch(buffer_size=batch_size)
ds = ds.cache()
ds = ds.repeat()
if run_mode == 'train':
ds = ds.shuffle(buffer_size=1000)
ds = ds.apply(
tf.data.experimental.map_and_batch(
map_func=parser,
batch_size=batch_size,
num_parallel_batches=10))
ds = ds.prefetch(buffer_size=10)
iterator = tf.data.make_one_shot_iterator(ds)
features, labels = iterator.get_next()
logger.info(f"feature size {features.shape}, label size {labels.shape}")
return features, labels
def main(not_use_args):
if FLAGS.use_hvd != 0:
import horovod.tensorflow as hvd
hvd.init()
txt_path = f"{ARR_DATA_PATH_MAP[FLAGS.version]}/{FLAGS.stock}/{FLAGS.run_mode}/txt"
batch_size = FLAGS.batch_size
# model configeration
configuration = get_config(
txt_path, batch_size,
FLAGS.config_name, FLAGS.use_hvd, FLAGS.stock,
FLAGS.run_mode, FLAGS.max_per_epoch)
configuration['fn_params']["lr"] = FLAGS.lr
configuration['fn_params']["lr_decay_type"] = FLAGS.lr_decay_type
configuration['fn_params']['use_hvd'] = FLAGS.use_hvd
max_steps = FLAGS.epoch * configuration["fn_params"]["steps_per_epoch"]
configuration["fn_params"]["max_steps"] = max_steps
if FLAGS.warm_up_epochs > 0:
configuration["fn_params"]["warm_up_epochs"] = FLAGS.warm_up_epochs
configuration['model_dir'] = get_model_dir(configuration['model_dir'], configuration['name'])
configuration["fn_params"]["optimizer"] = FLAGS.optimizer
if FLAGS.train_scope != "None":
configuration["fn_params"]["train_scope"] = FLAGS.train_scope
t0_regression = create_estimator(configuration, FLAGS.threads)
# load np data
factor_files = glob.glob(f"{ARR_DATA_PATH_MAP[FLAGS.version]}/{FLAGS.stock}/{FLAGS.run_mode}/factor/{txt_round}-*.npy")
label_files = glob.glob(f"{ARR_DATA_PATH_MAP[FLAGS.version]}/{FLAGS.stock}/{FLAGS.run_mode}/label/{txt_round}-*.npy")
txt_files = glob.glob(f"{txt_path}/{txt_round}-*.txt")
logger.info(f"Loading factors = {factor_files}, label = {label_files}")
input_fn = functools.partial(
get_tensor_feature_label,
version=FLAGS.version,
txt_files=txt_files,
batch_size=batch_size,
run_mode=FLAGS.run_mode,
label_num=label_num,
feature_np_dict=feature_np_dict,
label_np_dict=label_np_dict)
log_dir = configuration["model_dir"] + '/tf_logs'
logging_hooks = get_log_hooks(
log_dir=log_dir,
save_steps=configuration["fn_params"]["steps_per_epoch"],
run_mode=FLAGS.run_mode,
test_total_num=FLAGS.test_total_num
)
all_hooks = logging_hooks
t0_regression.train(
input_fn=input_fn,
max_steps=max_steps,
hooks=all_hooks)
if __name__ == '__main__':
tf.app.run()

Key Error on Tensorflow in Style transfer problem

def evaluate(encoder, decoder, in_lang, max_length=MAX_LENGTH):
if use_cuda:
in_lang = in_lang.cuda()
input_variable = Variable(in_lang)
input_variable = input_variable.unsqueeze(0)
input_length = input_variable.size(1)
encoder_hidden = encoder.initHidden()
encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))
encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs
for ei in range(input_length):
encoder_output, encoder_hidden = encoder(input_variable[:, ei],
encoder_hidden)
encoder_outputs[ei] = encoder_output[0][0]
decoder_input = Variable(torch.LongTensor([[SOS_token]])) # SOS
decoder_input = decoder_input.cuda() if use_cuda else decoder_input
decoder_hidden = encoder_hidden
decoded_words = []
decoder_attentions = torch.zeros(max_length, max_length)
if use_attn:
for di in range(max_length):
decoder_output, decoder_hidden, decoder_attention = decoder(
decoder_input, decoder_hidden, encoder_outputs)
decoder_attentions[di] = decoder_attention.data
topv, topi = decoder_output.data.topk(1)
ni = topi[0][0]
if ni == EOS_token:
decoded_words.append('<EOS>')
break
else:
decoded_words.append(lang_dataset.output_lang.index2word[ni])
decoder_input = Variable(torch.LongTensor([[ni]]))
decoder_input = decoder_input.cuda() if use_cuda else decoder_input
else:
for di in range(max_length):
decoder_output, decoder_hidden = decoder(decoder_input,
decoder_hidden)
topv, topi = decoder_output.data.topk(1)
ni = topi[0][0]
if ni == EOS_token:
decoded_words.append('<EOS>')
break
else:
decoded_words.append(lang_dataset.output_lang.index2word[ni])
decoder_input = Variable(torch.LongTensor([[ni]]))
decoder_input = decoder_input.cuda() if use_cuda else decoder_input
if use_attn:
return decoded_words, decoder_attentions[:di + 1]
else:
return decoded_words
def evaluateRandomly(encoder, decoder, n=100):
for i in range(n):
pair_idx = random.choice(list(range(len(lang_dataset))))
pair = lang_dataset.pairs[pair_idx]
in_lang, out_lang = lang_dataset[pair_idx]
print('>', pair[0])
print('=', pair[1])
if use_attn:
output_words, attentions = evaluate(encoder, decoder, in_lang)
else:
output_words = evaluate(encoder, decoder, in_lang)
output_sentence = ' '.join(output_words)
print('<', output_sentence)
print('')
I was training a seq2seq model using a parallel corpus and in the last few steps of evaluation, I get a key error. I am not that proficient in tensorflow and would love some advice.
Thank you
I was trying to do formality style transfer on a toy txt file. We were implementing code from a github repo to see if it works, and we have been dodging errors by some previous knowledge of machine learning. But this one error got me stumped and it seems to be intrinsically about tensorflow working. We were expecting to get this
< output sentence

Triplet-Loss using pre-trained network

I am trying to use the Triple-Loss technique to fine-tune an EfficientNet network for human Re-ID using Keras. Here is the code I am using:
This is the generator:
class SampleGen(object):
def __init__(self, file_class_mapping):
self.file_class_mapping = file_class_mapping
self.class_to_list_files = defaultdict(list)
self.list_all_files = list(file_class_mapping.keys())
self.range_all_files = list(range(len(self.list_all_files)))
for file, class_ in file_class_mapping.items():
self.class_to_list_files[class_].append(file)
self.list_classes = list(set(self.file_class_mapping.values()))
self.range_list_classes = range(len(self.list_classes))
self.class_weight = np.array([len(self.class_to_list_files[class_]) for class_ in self.list_classes])
self.class_weight = self.class_weight / np.sum(self.class_weight)
def get_sample(self):
class_idx = np.random.choice(self.range_list_classes, 1, p=self.class_weight)[0]
examples_class_idx = np.random.choice(range(len(self.class_to_list_files[self.list_classes[class_idx]])), 2)
positive_example_1, positive_example_2 = \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[0]], \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[1]]
negative_example = None
while negative_example is None or self.file_class_mapping[negative_example] == \
self.file_class_mapping[positive_example_1]:
negative_example_idx = np.random.choice(self.range_all_files, 1)[0]
negative_example = self.list_all_files[negative_example_idx]
return positive_example_1, negative_example, positive_example_2
def read_and_resize(filepath):
im = Image.open((filepath)).convert('RGB')
im = im.resize((image_size, image_size))
return np.array(im, dtype="float32")
def augment(im_array):
if np.random.uniform(0, 1) > 0.9:
im_array = np.fliplr(im_array)
return im_array
def gen(triplet_gen):
while True:
list_positive_examples_1 = []
list_negative_examples = []
list_positive_examples_2 = []
for i in range(batch_size):
positive_example_1, negative_example, positive_example_2 = triplet_gen.get_sample()
path_pos1 = join(path_train, positive_example_1)
path_neg = join(path_train, negative_example)
path_pos2 = join(path_train, positive_example_2)
positive_example_1_img = read_and_resize(path_pos1)
negative_example_img = read_and_resize(path_neg)
positive_example_2_img = read_and_resize(path_pos2)
positive_example_1_img = augment(positive_example_1_img)
negative_example_img = augment(negative_example_img)
positive_example_2_img = augment(positive_example_2_img)
list_positive_examples_1.append(positive_example_1_img)
list_negative_examples.append(negative_example_img)
list_positive_examples_2.append(positive_example_2_img)
A = preprocess_input(np.array(list_positive_examples_1))
B = preprocess_input(np.array(list_positive_examples_2))
C = preprocess_input(np.array(list_negative_examples))
label = None
yield {'anchor_input': A, 'positive_input': B, 'negative_input': C}, label
This is how I create the model:
def get_model():
base_model = efn.EfficientNetB3(weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = Dropout(0.6)(x)
x = Dense(embedding_dim)(x)
x = Lambda(lambda x: K.l2_normalize(x, axis=1), name="enc_out")(x)
embedding_model = Model(base_model.input, x, name="embedding")
input_shape = (image_size, image_size, 3)
anchor_input = Input(input_shape, name='anchor_input')
positive_input = Input(input_shape, name='positive_input')
negative_input = Input(input_shape, name='negative_input')
anchor_embedding = embedding_model(anchor_input)
positive_embedding = embedding_model(positive_input)
negative_embedding = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [anchor_embedding, positive_embedding, negative_embedding]
triplet_model = Model(inputs, outputs)
triplet_model.add_loss(K.mean(triplet_loss(outputs)))
return embedding_model, triplet_model
And this is how I'm trying to run the training:
if __name__ == '__main__':
data = pd.read_csv(path_csv)
train, test = train_test_split(data, train_size=0.7, random_state=1337)
file_id_mapping_train = {k: v for k, v in zip(train.Image.values, train.Id.values)}
file_id_mapping_test = {k: v for k, v in zip(test.Image.values, test.Id.values)}
gen_tr = gen(SampleGen(file_id_mapping_train))
gen_te = gen(SampleGen(file_id_mapping_test))
embedding_model, triplet_model = get_model()
for i, layer in enumerate(embedding_model.layers):
print(i, layer.name, layer.trainable)
for layer in embedding_model.layers[379:]:
layer.trainable = True
for layer in embedding_model.layers[:379]:
layer.trainable = False
triplet_model.compile(loss=None, optimizer=Adam(0.0001))
history = triplet_model.fit(x=gen_tr,
validation_data=gen_te,
epochs=10,
verbose=1,
steps_per_epoch=200,
validation_steps=20,
callbacks=create_callbacks())
The csv contains two columns (Image, Id) and I am generating triplets on the go using a generator. The layer 379 is the last layer of the network so I just leave that as trainable. I let it run for some epochs and it seems like it doesn't converge, it stays around 2.30. On epochs like 20, the loss is even higher than what I've started with. Here you can see what I mean: train example Is there anything wrong with the way I think about the problem?
Thank you!

ValueError: No gradients provided for any variable tensorflow 2.0

I am using tensorflow 2.0 and trying to make a actor critic algorithm to play the game of cartpole. I have done everything right but getting the following error: ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
Please help me out
Here is my code:
import gym
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
MAX_EPISODES = 2000
GAMMA = 0.9
LR_A = 0.001
LR_C = 0.01
env = gym.make("CartPole-v0")
N_ACTIONS = env.action_space.n
N_FEATURES = 4
def make_actor(n_features, n_actions):
inputs = tf.keras.Input(shape=[n_features])
hidden = tf.keras.layers.Dense(20, activation=tf.nn.relu)(inputs)
dist = tf.keras.layers.Dense(n_actions, activation=tf.nn.softmax)(hidden)
model = tf.keras.Model(inputs=inputs, outputs=dist)
return model
def make_critic(n_features):
inputs = tf.keras.Input(shape=[n_features])
hidden = tf.keras.layers.Dense(20, activation=tf.nn.relu)(inputs)
value = tf.keras.layers.Dense(1)(hidden)
model = tf.keras.Model(inputs=inputs, outputs=value)
return model
actor = make_actor(N_FEATURES, N_ACTIONS)
critic = make_critic(N_FEATURES)
actor.summary()
critic.summary()
actor_optimizer = tf.keras.optimizers.Adam(LR_A)
critic_optimizer = tf.keras.optimizers.Adam(LR_C)
def loss_actor(s, a, td_error):
dist = actor(s.reshape(1, 4)).numpy()
log_prob = np.log(dist[0, a])
exp_v = np.mean(log_prob * td_error)
return tf.multiply(exp_v, -1)
def loss_critic(s, s_, r, gamma):
s, s_ = s[np.newaxis, :], s_[np.newaxis, :]
v = critic(s)
v_ = critic(s_)
td_error = r + gamma * v_ - v
return tf.multiply(td_error, 1)
def train(max_episodes):
for episode in range(max_episodes):
s = env.reset().astype(np.float32)
t = 0
track_r = []
while True:
dist = actor(s.reshape(1, 4)).numpy()
a = np.random.choice(range(N_ACTIONS), p=dist.ravel())
s_, r, done, info = env.step(a)
s_ = s_.astype(np.float32)
if done: r=-20
track_r.append(r)
with tf.GradientTape() as cri_tape, tf.GradientTape() as act_tape:
td_error = loss_critic(s, s_, r, GAMMA)
gradient = cri_tape.gradient(td_error, critic.trainable_variables)
critic_optimizer.apply_gradients(zip(gradient,critic.trainable_variables))
with tf.GradientTape() as act_tape:
neg_exp_v = loss_actor(s, a, td_error.numpy())
gradient = act_tape.gradient(neg_exp_v, critic.trainable_variables)
actor_optimizer.apply_gradients(zip(gradient, actor.trainable_variables))
s = s_
t += 1
if done:
print("Episode:{} Steps:{}".format(episode+1, t))
train(MAX_EPISODES)
The error is on line 69:actor_optimizer.apply_gradients(zip(gradient, actor.trainable_variables))
When I tried to print out the gradients for the actor the result was None.
I am really not getting where the problem is.

A2C is not working due to critic loss is not converging

I'm trying to do my own implementation of the Advantage Actor Critic algorithm by using tensorflow. I used the code in https://github.com/BoYanSTKO/Practical_RL-coursera/blob/master/week5_policy_based/practice_a3c.ipynb as a rough template on how I should write the algorithm.
I tried it on the simple CartPole-v0 gym environment but by implementation fails badly. The critics loss just explodes and becomes way to large while the actors loss is rather low.
I'm not sure what I'm doing wrong here. Any help? :)
I've tried separating the actor and critic from each other by having 2 different networks. This did not help either. Have also tried fine tuning some stuff like gamma and learning rate without any success.
!/usr/bin/python
import tensorflow as tf
import numpy as np
import gym
import random
from tensorboardX import SummaryWriter
class ActorCritic():
def __init__(self,state_dim,n_actions,learning_rate,gamma=0.99):
with tf.variable_scope("ActorCritic"):
self.states_ph = tf.placeholder(tf.float32,(None,state_dim),name="states")
self.action_ph = tf.placeholder(tf.int32,(None,),name="actions")
self.n_actions = n_actions
self.reward_ph = tf.placeholder(tf.float32,(None,),name="rewards")
self.next_state_values = tf.placeholder(tf.float32,(None,),name="rewards")
self.is_done_ph = tf.placeholder(tf.float32,(None,),name="rewards")
net = tf.layers.dense(self.states_ph,24,activation=tf.nn.relu)
self.logits = tf.layers.dense(net,n_actions,activation=None)
self.state_values = tf.layers.dense(net,1,activation=None)
self.action_probs = tf.nn.softmax(self.logits)
self.log_prob = tf.nn.log_softmax(self.logits)
self.entropy = -tf.reduce_sum(self.action_probs*self.log_prob,axis=-1,name="entropy")
self.logp_actions = tf.reduce_sum(self.log_prob*tf.one_hot(self.action_ph,depth=n_actions),axis=-1)
self.target_state_values = self.reward_ph + gamma*(1.0-self.is_done_ph)*self.next_state_values
self.advantage = self.target_state_values - self.state_values
self.actor_loss = -tf.reduce_mean(self.logp_actions * tf.stop_gradient(self.advantage)) - 0.01*tf.reduce_mean(self.entropy)
self.critic_loss = tf.reduce_mean(self.advantage**2.0)
self.train_opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.actor_loss+self.critic_loss)
def train(self,states,actions,rewards,is_done,nxt_state_values_batch):
sess = tf.get_default_session()
return sess.run([self.critic_loss,self.actor_loss,self.train_opt],feed_dict={
self.next_state_values:nxt_state_values_batch,
self.states_ph:states,
self.action_ph:actions,
self.reward_ph:rewards,
self.is_done_ph:is_done})
def predict_state_values(self,states):
sess = tf.get_default_session()
return sess.run(self.state_values,feed_dict={self.states_ph:states})
def sample_actions(self,states):
sess = tf.get_default_session()
action_probs = sess.run(self.action_probs,{self.states_ph:states})
return [ np.random.choice(range(self.n_actions),p=action_prob) for action_prob in action_probs ]
class EnvBatch():
def __init__(self,env_name,n_envs):
self.envs = [gym.make(env_name) for env in range(n_envs)]
self.n_actions = self.envs[0].action_space.n
self.state_dim = self.envs[0].observation_space.shape[0]
def reset(self):
return [env.reset().tolist() for env in self.envs ]
def step(self,actions):
states_batch, rewards_batch, is_done_batch = [], [], []
for action, env in zip(actions,self.envs):
s, r , d, _ = env.step(action)
if d:
s = env.reset()
states_batch.append(s)
rewards_batch.append(r)
is_done_batch.append(d)
return np.array(states_batch), np.array(rewards_batch), np.array(is_done_batch)
def evaluate_performance(env_name,agent,nr_runs=10):
env = gym.make(env_name)
rewards = []
for _ in range(nr_runs):
state = env.reset()
is_done = False
acc_reward = 0.0
while not is_done:
action = agent.sample_actions([state])
nxt_state, reward, is_done, _ = env.step(action[0])
state = nxt_state
acc_reward += reward
rewards.append(acc_reward)
return np.mean(rewards)
tf.reset_default_graph()
env = EnvBatch("CartPole-v0",10)
agent = ActorCritic(env.state_dim,env.n_actions,learning_rate=0.001)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
state_batch = env.reset()
writer = SummaryWriter()
for i in range(100000):
actions = agent.sample_actions(state_batch)
nxt_state_batch, rewards_batch, is_done_batch = env.step(actions)
nxt_state_values = agent.predict_state_values(nxt_state_batch).ravel()
critic_loss, actor_loss, _ = agent.train(state_batch,actions,rewards_batch,is_done_batch,nxt_state_values)
writer.add_scalar("actor_loss",actor_loss,i)
writer.add_scalar("critic_loss",critic_loss,i)
if i%50==0:
test_reward = evaluate_performance("CartPole-v0",agent)
writer.add_scalar("test_reward",test_reward,i)
if test_reward > 195:
print "Done!"
states_batch = nxt_state_batch
sess.close()
writer.close()