PPO: NaN Policy return in Tensorflow Keras - tensorflow

I am trying to implement the PPO algorithm with clipped loss in addition to KL penalties and run training on Mujuco Gym environments. After ~ 15000 gradient steps, policy collapses into returning NaN.
These are the policy training info before the policy collapses:
A: tf.Tensor(-0.10426917, shape=(), dtype=float32)
LOG_A: tf.Tensor(37.021107, shape=(), dtype=float32)
LOSS: tf.Tensor(0.16812761, shape=(), dtype=float32)
GRAD: tf.Tensor(
[[-3.4624012e-04 -1.2807851e-04 -1.9778654e-01 ... -2.7586846e+00
-1.2552655e-01 -1.7212760e-03]
[ 4.6312678e-05 -2.2251482e-04 5.5088173e-03 ... 9.5249921e-02
2.2186586e-03 2.0080474e-04]
[ 2.0314787e-05 -1.6381161e-04 7.1509695e-03 ... 1.1740552e-01
3.4010289e-03 1.2105847e-04]
...
[ 1.7827883e-04 -1.1712313e-05 5.8873045e-01 ... 9.2354174e+00
2.9186043e-01 -2.2818900e-03]
[-9.0385452e-05 3.0951984e-03 -3.6487404e-02 ... -2.6829168e-01
-3.9602429e-02 2.0654879e-03]
[ 2.2925157e-04 4.6892464e-03 5.9946489e-01 ... 9.3497839e+00
3.0514282e-01 -1.3834883e-03]], shape=(11, 256), dtype=float32)
A: tf.Tensor(nan, shape=(), dtype=float32)
LOG_A: tf.Tensor(nan, shape=(), dtype=float32)
Note: The gradient info captures only the gradients of the first layer, as I have found capturing all gradient info to be messy and seemingly redundant.
What I have tried:
Tuning hyperparameters: I have tried multiple sets of hyperparameters including the one documented in the original paper. The same error occurs(the hyperparams setup provided in the example below are chosen for higher sampling efficiency for faster debugging).
Gradient clipping: Gradient norm has been clipped to be unitary, and as shown above, it does not appear to have the exploding gradient issue.
Guaranteed numerical stability of tanh squashing of policy log probability: A small epsilon was used to clip the sum of squares so that action log probability does not return inf after tanh squashing.
Unitized code example:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import gym
import scipy.signal
import time
from tensorflow.keras import Model
import matplotlib.pyplot as plt
import random
import tensorflow_probability as tfp
tf.keras.backend.set_floatx('float32')
EPSILON = 1e-10
################## GLOBAL SETUP P1 ##################
problem = "Hopper-v2"
env = gym.make(problem)
eval_env = gym.make(problem)
num_states = env.observation_space.shape[0]
print("Size of State Space -> {}".format(num_states), flush=True)
num_actions = env.action_space.shape[0]
print("Size of Action Space -> {}".format(num_actions), flush=True)
upper_bound = env.action_space.high[0]
lower_bound = env.action_space.low[0]
print("Max Value of Action -> {}".format(upper_bound), flush=True)
print("Min Value of Action -> {}".format(lower_bound), flush=True)
minibatch_size = 256
##########*****####################*****##########
#################### Auxiliaries ####################
def discounted_cumulative_sums(x, discount):
# Discounted cumulative sums of vectors for computing rewards-to-go and advantage estimates
return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]
##########*****####################*****##########
#################### Replay Buffer ####################
class Buffer:
def __init__(self, observation_dimensions, action_dimensions, size, gamma=0.99, lam=0.95):
self.observation_buffer = np.zeros(
(size, observation_dimensions), dtype=np.float32
)
self.action_buffer = np.zeros((size, action_dimensions), dtype=np.int32)
self.advantage_buffer = np.zeros(size, dtype=np.float32)
self.reward_buffer = np.zeros(size, dtype=np.float32)
self.return_buffer = np.zeros(size, dtype=np.float32)
self.value_buffer = np.zeros(size, dtype=np.float32)
self.logprobability_buffer = np.zeros(size, dtype=np.float32)
self.gamma, self.lam = gamma, lam
self.pointer, self.trajectory_start_index = 0, 0
def store(self, observation, action, reward, value, logprobability):
self.observation_buffer[self.pointer] = observation
self.action_buffer[self.pointer] = action
self.reward_buffer[self.pointer] = reward
self.value_buffer[self.pointer] = value
self.logprobability_buffer[self.pointer] = logprobability
self.pointer += 1
def finish_trajectory(self, last_value=0):
path_slice = slice(self.trajectory_start_index, self.pointer)
rewards = np.append(self.reward_buffer[path_slice], last_value)
values = np.append(self.value_buffer[path_slice], last_value)
deltas = rewards[:-1] + self.gamma * values[1:] - values[:-1]
self.advantage_buffer[path_slice] = discounted_cumulative_sums(
deltas, self.gamma * self.lam
)
self.return_buffer[path_slice] = discounted_cumulative_sums(
rewards, self.gamma
)[:-1]
self.trajectory_start_index = self.pointer
def get(self):
# Get all data of the buffer and normalize the advantages
rindex = np.random.choice(self.pointer, minibatch_size)
advantage_mean, advantage_std = (
np.mean(self.advantage_buffer[rindex]),
np.std(self.advantage_buffer[rindex]),
)
return (
self.observation_buffer[rindex],
self.action_buffer[rindex],
(self.advantage_buffer[rindex] - advantage_mean) / advantage_std,
self.return_buffer[rindex],
self.logprobability_buffer[rindex],
)
def clear(self):
self.pointer, self.trajectory_start_index = 0, 0
##########*****####################*****##########
#################### Models ####################
class Actor(Model):
def __init__(self):
super().__init__()
self.action_dim = num_actions
self.dense1_layer = layers.Dense(256, activation="relu")
self.dense2_layer = layers.Dense(256, activation="relu")
self.mean_layer = layers.Dense(self.action_dim)
self.stdev_layer = layers.Dense(self.action_dim)
def call(self, state, eval_mode=False):
a1 = self.dense1_layer(state)
a2 = self.dense2_layer(a1)
mu = self.mean_layer(a2)
log_sigma = self.stdev_layer(a2)
sigma = tf.exp(log_sigma)
covar_m = tf.linalg.diag(sigma**2)
dist = tfp.distributions.MultivariateNormalTriL(loc=mu, scale_tril=tf.linalg.cholesky(covar_m))
if eval_mode:
action_ = mu
else:
action_ = dist.sample()
action = tf.tanh(action_)
log_pi_ = dist.log_prob(action_)
log_pi = log_pi_ - tf.reduce_sum(tf.math.log(tf.clip_by_value(1 - action**2, EPSILON, 1.0)), axis=1)
return action*upper_bound, log_pi
def get_critic():
state_input = layers.Input(shape=(num_states))
state_out = layers.Dense(256, activation="relu")(state_input)
out = layers.Dense(256, activation="relu")(state_out)
outputs = layers.Dense(1, dtype='float32')(out)
model = tf.keras.Model(state_input, outputs)
return model
##########*****####################*****##########
#################### GLOBAL SETUP P2 ####################
# Hyperparameters of the PPO algorithm
horizon = 2048
iterations = 2000
gamma = 0.99
clip_ratio = 0.2
epochs = 500
lam = 0.97
target_kl = 0.01
beta = 1.0
render = False
actor_model = Actor()
critic_model = get_critic()
lr = 0.0003
policy_optimizer = tf.keras.optimizers.Adam(learning_rate=lr,
# )
clipnorm=1.0)
value_optimizer = tf.keras.optimizers.Adam(learning_rate=lr,
# )
clipnorm=1.0)
buffer = Buffer(num_states, num_actions, horizon)
##########*****####################*****##########
#################### Training ####################
observation, episode_return, episode_length = env.reset(), 0, 0
tf_observation = tf.expand_dims(observation, 0)
def train_policy(
observation_buffer, action_buffer, logprobability_buffer, advantage_buffer
):
global beta
with tf.GradientTape() as tape: # Record operations for automatic differentiation.
action, log_a = actor_model(observation_buffer)
# print("A: ", tf.reduce_mean(action))
# print("LOG_A: ", tf.reduce_mean(log_a))
ratio = tf.exp(
log_a
- logprobability_buffer
)
# print("R: ", tf.reduce_mean(ratio), flush=True)
cd_ratio = tf.clip_by_value(ratio, (1 - clip_ratio), (1 + clip_ratio))
min_advantage = cd_ratio * advantage_buffer
_kl = -beta*tf.math.reduce_max(logprobability_buffer - log_a)
policy_loss = -tf.reduce_mean(tf.minimum(ratio * advantage_buffer, min_advantage) + _kl)
# print("LOSS: ", policy_loss)
policy_grads = tape.gradient(policy_loss, actor_model.trainable_variables)
policy_optimizer.apply_gradients(zip(policy_grads, actor_model.trainable_variables))
# print("GRAD: ", policy_grads[0], flush=True)
action_opt, log_a_opt = actor_model(observation_buffer)
kl = tf.reduce_mean(
logprobability_buffer
- log_a_opt
)
if kl < target_kl/1.5:
beta = beta/2
if kl > target_kl*1.5:
beta = beta*2
return kl
def train_value_function(observation_buffer, return_buffer):
with tf.GradientTape() as tape: # Record operations for automatic differentiation.
value_loss = tf.reduce_mean((return_buffer - critic_model(observation_buffer)) ** 2)
value_grads = tape.gradient(value_loss, critic_model.trainable_variables)
value_optimizer.apply_gradients(zip(value_grads, critic_model.trainable_variables))
for ite in range(iterations):
for t in range(horizon):
if render:
env.render()
action, log_pi_a = actor_model(tf_observation)
action = action[0]
observation_new, reward, done, _ = env.step(action)
episode_return += reward
episode_length += 1
value_t = critic_model(tf_observation)
buffer.store(observation, action, reward, value_t, log_pi_a)
observation = observation_new
tf_observation = tf.expand_dims(observation, 0)
terminal = done
if terminal or (t == horizon - 1):
last_value = 0 if done else critic_model(tf_observation)
buffer.finish_trajectory(last_value)
observation, episode_return, episode_length = env.reset(), 0, 0
tf_observation = tf.expand_dims(observation, 0)
for _ in range(epochs):
(
observation_buffer,
action_buffer,
advantage_buffer,
return_buffer,
logprobability_buffer,
) = buffer.get()
kl = train_policy(
observation_buffer, action_buffer, logprobability_buffer, advantage_buffer
)
train_value_function(observation_buffer, return_buffer)
buffer.clear()
##########*****####################*****##########
Note:
The code base is constructed by a combination of a modified version of the official keras PPO tutorial(https://keras.io/examples/rl/ppo_cartpole/) and Modules(Mainly the policy network) that have been tested in other implementations.
I refrained from using tf_function declaration as I am very new to tensorflow, thus not understanding its impact, and I have read from various github issues that sometimes such declaration causes numerical instability due to caching. However, it could be a source of my issues.
Any help is appreciated, and apologies if something is missing or unclear.

Related

Convert Tensorflow 1.x code with custom loss into 2.x

Suppose I have the following code written in Tensorflow 1.x where I define custom loss function. I wish to remove .compat.v1., Session, placeholder etc. and convert it into Tensorflow 2.x.
How to do so?
import DGM
import tensorflow as tf
import numpy as np
import scipy.stats as spstats
import matplotlib.pyplot as plt
from tqdm.notebook import trange
# Option parameters
phi = 10
n = 0.01
T = 4
# Solution parameters (domain on which to solve PDE)
t_low = 0.0 - 1e-10
x_low = 0.0 + 1e-10
x_high = 1.0
# neural network parameters
num_layers = 3
nodes_per_layer = 50
# Training parameters
sampling_stages = 2500 # number of times to resample new time-space domain points
steps_per_sample = 20 # number of SGD steps to take before re-sampling
# Sampling parameters
nsim_interior = 100
nsim_boundary_1 = 50
nsim_boundary_2 = 50
nsim_initial = 50
x_multiplier = 1.1 # multiplier for oversampling i.e. draw x from [x_low, x_high * x_multiplier]
def sampler(nsim_interior, nsim_boundary_1, nsim_boundary_2, nsim_initial):
''' Sample time-space points from the function's domain; points are sampled
uniformly on the interior of the domain, at the initial/terminal time points
and along the spatial boundary at different time points.
Args:
nsim_interior: number of space points in the interior of U
nsim_boundary_1: number of space points in the boundary of U
nsim_boundary_2: number of space points in the boundary of U_x
nsim_initial: number of space points at the initial time
'''
# Sampler #1: domain interior
t_interior = np.random.uniform(low=t_low, high=T, size=[nsim_interior, 1])
x_interior = np.random.uniform(low=x_low, high=x_high*x_multiplier, size=[nsim_interior, 1])
# Sampler #2: spatial boundary 1
t_boundary_1 = np.random.uniform(low=t_low, high=T, size=[nsim_boundary_1, 1])
x_boundary_1 = np.ones((nsim_boundary_1, 1))
# Sampler #3: spatial boundary 2
t_boundary_2 = np.random.uniform(low=t_low, high=T, size=[nsim_boundary_2, 1])
x_boundary_2 = np.zeros((nsim_boundary_2, 1))
# Sampler #4: initial condition
t_initial = np.zeros((nsim_initial, 1))
x_initial = np.random.uniform(low=x_low, high=x_high*x_multiplier, size=[nsim_initial, 1])
return (
t_interior, x_interior,
t_boundary_1, x_boundary_1,
t_boundary_2, x_boundary_2,
t_initial, x_initial
)
def loss(
model,
t_interior, x_interior,
t_boundary_1, x_boundary_1,
t_boundary_2, x_boundary_2,
t_initial, x_initial
):
''' Compute total loss for training.
Args:
model: DGM model object
t_interior, x_interior: sampled time / space points in the interior of U
t_boundary_1, x_boundary_1: sampled time / space points in the boundary of U
t_boundary_2, x_boundary_2: sampled time / space points in the boundary of U_x
t_initial, x_initial: sampled time / space points at the initial time
'''
# Loss term #1: PDE
# compute function value and derivatives at current sampled points
u = model(t_interior, x_interior)
u_t = tf.gradients(ys=u, xs=t_interior)[0]
u_x = tf.gradients(ys=u, xs=x_interior)[0]
u_xx = tf.gradients(ys=u_x, xs=x_interior)[0]
diff_u = u_t - u_xx + phi**2 * (tf.nn.relu(u) + 1e-10)**n
# compute average L2-norm for the PDE
L1 = tf.reduce_mean(input_tensor=tf.square(diff_u))
# Loss term #2: First b. c.
u = model(t_boundary_1, x_boundary_1)
bc1_error = u - 1
# Loss term #3: Second b. c.
u = model(t_boundary_2, x_boundary_2)
u_x = tf.gradients(ys=u, xs=x_boundary_2)[0]
bc2_error = u_x - 0
# Loss term #3: Initial condition
u = model(t_initial, x_initial)
init_error = u - 1
# compute average L2-norm for the initial/boundary conditions
L2 = tf.reduce_mean(input_tensor=tf.square(bc1_error + bc2_error + init_error))
return L1, L2
# initialize DGM model (last input: space dimension = 1)
model = DGM.DGMNet(nodes_per_layer, num_layers, 1)
# tensor placeholders (_tnsr suffix indicates tensors)
# inputs (time, space domain interior, space domain at initial time)
t_interior_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
x_interior_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
t_boundary_1_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
x_boundary_1_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
t_boundary_2_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
x_boundary_2_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
t_initial_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
x_initial_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
# loss
L1_tnsr, L2_tnsr = loss(
model,
t_interior_tnsr, x_interior_tnsr,
t_boundary_1_tnsr, x_boundary_1_tnsr,
t_boundary_2_tnsr, x_boundary_2_tnsr,
t_initial_tnsr, x_initial_tnsr
)
loss_tnsr = L1_tnsr + L2_tnsr
# set optimizer
starting_learning_rate = 3e-4
global_step = tf.Variable(0, trainable=False)
lr = tf.compat.v1.train.exponential_decay(
learning_rate=starting_learning_rate,
global_step=global_step,
decay_steps=1e5,
decay_rate=0.96,
staircase=True,
)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=lr).minimize(loss_tnsr)
# initialize variables
init_op = tf.compat.v1.global_variables_initializer()
# open session
sess = tf.compat.v1.Session()
sess.run(init_op)
try:
model.load_weights("checkpoint/")
print("Loading from checkpoint.")
except:
print("Checkpoint not found.")
# for each sampling stage
for i in trange(sampling_stages):
# sample uniformly from the required regions
t_interior, x_interior, \
t_boundary_1, x_boundary_1, \
t_boundary_2, x_boundary_2, \
t_initial, x_initial = sampler(
nsim_interior, nsim_boundary_1, nsim_boundary_2, nsim_initial
)
# for a given sample, take the required number of SGD steps
for _ in range(steps_per_sample):
loss, L1, L2, _ = sess.run(
[loss_tnsr, L1_tnsr, L2_tnsr, optimizer],
feed_dict = {
t_interior_tnsr: t_interior,
x_interior_tnsr: x_interior,
t_boundary_1_tnsr: t_boundary_1,
x_boundary_1_tnsr: x_boundary_1,
t_boundary_2_tnsr: t_boundary_2,
x_boundary_2_tnsr: x_boundary_2,
t_initial_tnsr: t_initial,
x_initial_tnsr: x_initial,
}
)
if i % 10 == 0:
print(f"Loss: {loss:.5f},\t L1: {L1:.5f},\t L2: {L2:.5f},\t iteration: {i}")
model.save_weights("checkpoint/")
I tried searching how to implement custom loss functions with model as an argument, but couldn't implement it.
For model.compile there is a loss argument for which you can pass the Loss function. May be a string (name of loss function), or a tf.keras.losses.Loss instance. For example
Model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
loss=tf.keras.losses.BinaryCrossentropy())
If you have created your custom loss function you can also pass that loss function to the loss argument by providing the name of that loss function. For example
def my_loss_fn(y_true, y_pred):
squared_difference = tf.square(y_true - y_pred)
return tf.reduce_mean(squared_difference, axis=-1)
model.compile(optimizer='adam', loss=my_loss_fn)
Thank You.

TypeError when trying to make a loop creating artificial neural networks

I am working on an artifical neural network which I have created via subclassing.
The subclassing looks like this:
import time
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import scipy.stats as si
import sympy as sy
from sympy.stats import Normal, cdf
from sympy import init_printing
class DGMNet(tf.keras.Model):
def __init__(self, n_layers, n_nodes, dimensions=1):
"""
Parameters:
- n_layers: number of layers
- n_nodes: number of nodes in (inner) layers
- dimensions: number of spacial dimensions
"""
super().__init__()
self.n_layers = n_layers
self.initial_layer = DenseLayer(dimensions + 1, n_nodes, activation="relu")
self.lstmlikelist = []
for _ in range(self.n_layers):
self.lstmlikelist.append(LSTMLikeLayer(dimensions + 1, n_nodes, activation="relu"))
self.final_layer = DenseLayer(n_nodes, 1, activation=None)
def call(self, t, x):
X = tf.concat([t,x], 1)
S = self.initial_layer.call(X)
for i in range(self.n_layers):
S = self.lstmlikelist[i].call({'S': S, 'X': X})
result = self.final_layer.call(S)
return result
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
"""
Parameters:
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
"""
super(DenseLayer, self).__init__()
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.W = self.add_weight(shape=(self.n_inputs, self.n_outputs),
initializer='random_normal',
trainable=True)
self.b = self.add_weight(shape=(1, self.n_outputs),
initializer='random_normal',
trainable=True)
self.activation = _get_function(activation)
def call(self, inputs):
S = tf.add(tf.matmul(inputs, self.W), self.b)
S = self.activation(S)
return S
class LSTMLikeLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
"""
Parameters:
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
"""
super(LSTMLikeLayer, self).__init__()
self.n_outputs = n_outputs
self.n_inputs = n_inputs
self.Uz = self.add_variable("Uz", shape=[self.n_inputs, self.n_outputs])
self.Ug = self.add_variable("Ug", shape=[self.n_inputs, self.n_outputs])
self.Ur = self.add_variable("Ur", shape=[self.n_inputs, self.n_outputs])
self.Uh = self.add_variable("Uh", shape=[self.n_inputs, self.n_outputs])
self.Wz = self.add_variable("Wz", shape=[self.n_outputs, self.n_outputs])
self.Wg = self.add_variable("Wg", shape=[self.n_outputs, self.n_outputs])
self.Wr = self.add_variable("Wr", shape=[self.n_outputs, self.n_outputs])
self.Wh = self.add_variable("Wh", shape=[self.n_outputs, self.n_outputs])
self.bz = self.add_variable("bz", shape=[1, self.n_outputs])
self.bg = self.add_variable("bg", shape=[1, self.n_outputs])
self.br = self.add_variable("br", shape=[1, self.n_outputs])
self.bh = self.add_variable("bh", shape=[1, self.n_outputs])
self.activation = _get_function(activation)
def call(self, inputs):
S = inputs['S']
X = inputs['X']
Z = self.activation(tf.add(tf.add(tf.matmul(X, self.Uz), tf.matmul(S, self.Wz)), self.bz))
G = self.activation(tf.add(tf.add(tf.matmul(X, self.Ug), tf.matmul(S, self.Wg)), self.bg))
R = self.activation(tf.add(tf.add(tf.matmul(X, self.Ur), tf.matmul(S, self.Wr)), self.br))
H = self.activation(tf.add(tf.add(tf.matmul(X, self.Uh), tf.matmul(tf.multiply(S, R), self.Wh)), self.bh))
Snew = tf.add(tf.multiply(tf.subtract(tf.ones_like(G), G), H), tf.multiply(Z, S))
return Snew
def _get_function(name):
f = None
if name == "tanh":
f = tf.nn.tanh
elif name == "sigmoid":
f = tf.nn.sigmoid
elif name == "relu":
f = tf.nn.relu
elif not name:
f = tf.identity
assert f is not None
return f
# Sampling
def sampler(N1, N2, N3):
np.random.seed(42)
# Sampler #1: PDE domain
t1 = np.random.uniform(low=T0,
high=T,
size=[N1,1])
s1 = np.random.uniform(low=S1,
high=S2,
size=[N1,1])
# Sampler #2: boundary condition
t2 = np.zeros(shape=(1, 1))
s2 = np.zeros(shape=(1, 1))
# Sampler #3: initial/terminal condition
t3 = T * np.ones((N3,1)) #Terminal condition
s3 = np.random.uniform(low=S1,
high=S2,
size=[N3,1])
return (t1, s1, t2, s2, t3, s3)
# Loss function
def loss(model, t1, x1, t2, x2, t3, x3):
# Loss term #1: PDE
V = model(t1, x1)
V_t = tf.gradients(V, t1)[0]
V_x = tf.gradients(V, x1)[0]
V_xx = tf.gradients(V_x, x1)[0]
f = V_t + r*x1*V_x + 0.5*sigma**2*x1**2*V_xx - r*V
L1 = tf.reduce_mean(tf.square(f))
# Loss term #2: boundary condition
#L2 = tf.reduce_mean(tf.square(V))
# Loss term #3: initial/terminal condition
L3 = tf.reduce_mean(tf.square(model(t3, x3) - tf.math.maximum(x3-K,0)))
return (L1, L3)
# B-S's analytical known solution
def analytical_solution(t, x):
#C = SN(d1) - Xe- rt N(d2)
#S: spot price
#K: strike price
#T: time to maturity
#r: interest rate
#sigma: volatility of underlying asset
d1 = (np.log(x / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
d2 = (np.log(x / K) + (r - 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
call = (x * si.norm.cdf(d1, 0.0, 1.0) - K * np.exp(-r * T) * si.norm.cdf(d2, 0.0, 1.0))
return call
# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)
# Strike price
K = 0.5
# PDE parameters
r = 0.05 # Interest rate
sigma = 0.25 # Volatility
# Time limits
T0 = 0.0 + 1e-10 # Initial time
T = 1.0 # Terminal time
# Space limits
S1 = 0.0 + 1e-10 # Low boundary
S2 = 1.0 # High boundary
# Number of samples
NS_1 = 1000
NS_2 = 0
NS_3 = 100
t1, s1, t2, s2, t3, s3 = sampler(NS_1, NS_2, NS_3)
Now what I want to do is to iterate over different parameters and create a new ann for each iteration.
My plan was to do it in this way:
tf.compat.v1.disable_eager_execution()
t1_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x1_t = tf.compat.v1.placeholder(tf.float32, [None,1])
t2_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x2_t = tf.compat.v1.placeholder(tf.float32, [None,1])
t3_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x3_t = tf.compat.v1.placeholder(tf.float32, [None,1])
volatility_list = [0.08]#[0.08, 0.16, 0.18, 0.2, 0.28]
stages_list = [10]#, 50, 100]
layers_list = [3]#, 5, 7]
npl_list = [3]#, 6, 9, 12, 15]
for sigma in volatility_list:
for st in stages_list:
for lay in layers_list:
for npl in npl_list:
# Neural Network definition
num_layers = lay
nodes_per_layer = npl
ann = DGMNet(num_layers, nodes_per_layer)
L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t)
loss_t = L1_t + L3_t
# Optimizer parameters
global_step = tf.Variable(1, trainable=False)
starter_learning_rate = 0.001
learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, global_step,
100000, 0.96, staircase=True)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_t)
# Training parameters
steps_per_sample = st
sampling_stages = 100#2000
# Plot tensors
tplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="tplot_t") # We name to recover it later
xplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="xplot_t")
vplot_t = tf.identity(ann(tplot_t, xplot_t), name="vplot_t") # Trick for naming the trained model
# Training data holders
sampling_stages_list = []
elapsed_time_list = []
loss_list = []
L1_list = []
L3_list = []
# Train network!!
init_op = tf.compat.v1.global_variables_initializer()
sess = tf.compat.v1.Session()
sess.run(init_op)
for i in range(sampling_stages):
t1, x1, t2, x2, t3, x3 = sampler(NS_1, NS_2, NS_3)
start_time = time.clock()
for _ in range(steps_per_sample):
loss, L1, L3, _ = sess.run([loss_t, L1_t, L3_t, optimizer],
feed_dict = {t1_t:t1, x1_t:x1, t2_t:t2, x2_t:x2, t3_t:t3, x3_t:x3})
end_time = time.clock()
elapsed_time = end_time - start_time
sampling_stages_list.append(i)
elapsed_time_list.append(elapsed_time)
loss_list.append(loss)
L1_list.append(L1)
L3_list.append(L3)
text = "Stage: {:04d}, Loss: {:e}, L1: {:e}, L3: {:e}, {:f} seconds".format(i, loss, L1, L3, elapsed_time)
print(text)
#goodness of fit
time_0 = 0
listofzeros = [time_0] * 100
prices_for_goodness = np.linspace(S1,S2, 100)
goodness_list = []
solution_goodness = analytical_solution(listofzeros, prices_for_goodness)
ttt = time_0*np.ones_like(prices_for_goodness.reshape(-1,1))
nn_goodness, = sess.run([vplot_t],
feed_dict={tplot_t:ttt, xplot_t:prices_for_goodness.reshape(-1,1)})
deviation_list = np.abs(solution_goodness - nn_goodness)/(T-T0)
print("{0:.2f}%".format(np.average(deviation_list)*100))
Unfortunately as soon as it ends the first iteration I get a TypeError that 'numpy.float32' object is not callable
Error Traceback:
TypeError Traceback (most recent call last)
<ipython-input-14-bb14643d0c42> in <module>()
10
11
---> 12 L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t)
13 loss_t = L1_t + L3_t
14
TypeError: 'numpy.float32' object is not callable
I guess that the problem is with the creation of the placeholders, however I am not sure how to solve it. Maybe one of you can help me
Thanks in advance!
Chris
Did you create a variable called 'loss'? It seems that the loss function is redefined by a variable with the same name, so then python tries to call that variable as a function.

ValueError: No gradients provided for any variable tensorflow 2.0

I am using tensorflow 2.0 and trying to make a actor critic algorithm to play the game of cartpole. I have done everything right but getting the following error: ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
Please help me out
Here is my code:
import gym
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
MAX_EPISODES = 2000
GAMMA = 0.9
LR_A = 0.001
LR_C = 0.01
env = gym.make("CartPole-v0")
N_ACTIONS = env.action_space.n
N_FEATURES = 4
def make_actor(n_features, n_actions):
inputs = tf.keras.Input(shape=[n_features])
hidden = tf.keras.layers.Dense(20, activation=tf.nn.relu)(inputs)
dist = tf.keras.layers.Dense(n_actions, activation=tf.nn.softmax)(hidden)
model = tf.keras.Model(inputs=inputs, outputs=dist)
return model
def make_critic(n_features):
inputs = tf.keras.Input(shape=[n_features])
hidden = tf.keras.layers.Dense(20, activation=tf.nn.relu)(inputs)
value = tf.keras.layers.Dense(1)(hidden)
model = tf.keras.Model(inputs=inputs, outputs=value)
return model
actor = make_actor(N_FEATURES, N_ACTIONS)
critic = make_critic(N_FEATURES)
actor.summary()
critic.summary()
actor_optimizer = tf.keras.optimizers.Adam(LR_A)
critic_optimizer = tf.keras.optimizers.Adam(LR_C)
def loss_actor(s, a, td_error):
dist = actor(s.reshape(1, 4)).numpy()
log_prob = np.log(dist[0, a])
exp_v = np.mean(log_prob * td_error)
return tf.multiply(exp_v, -1)
def loss_critic(s, s_, r, gamma):
s, s_ = s[np.newaxis, :], s_[np.newaxis, :]
v = critic(s)
v_ = critic(s_)
td_error = r + gamma * v_ - v
return tf.multiply(td_error, 1)
def train(max_episodes):
for episode in range(max_episodes):
s = env.reset().astype(np.float32)
t = 0
track_r = []
while True:
dist = actor(s.reshape(1, 4)).numpy()
a = np.random.choice(range(N_ACTIONS), p=dist.ravel())
s_, r, done, info = env.step(a)
s_ = s_.astype(np.float32)
if done: r=-20
track_r.append(r)
with tf.GradientTape() as cri_tape, tf.GradientTape() as act_tape:
td_error = loss_critic(s, s_, r, GAMMA)
gradient = cri_tape.gradient(td_error, critic.trainable_variables)
critic_optimizer.apply_gradients(zip(gradient,critic.trainable_variables))
with tf.GradientTape() as act_tape:
neg_exp_v = loss_actor(s, a, td_error.numpy())
gradient = act_tape.gradient(neg_exp_v, critic.trainable_variables)
actor_optimizer.apply_gradients(zip(gradient, actor.trainable_variables))
s = s_
t += 1
if done:
print("Episode:{} Steps:{}".format(episode+1, t))
train(MAX_EPISODES)
The error is on line 69:actor_optimizer.apply_gradients(zip(gradient, actor.trainable_variables))
When I tried to print out the gradients for the actor the result was None.
I am really not getting where the problem is.

Why does the cost in my implementation of a deep neural network increase after a few iterations?

I am a beginner in machine learning and neural networks. Recently, after watching Andrew Ng's lectures on deep learning, I tried to implement a binary classifier using deep neural networks on my own.
However, the cost of the function is expected to decrease after each iteration.
In my program, it decreases slightly in the beginning, but rapidly increases later. I tried to make changes in learning rate and number of iterations, but to no avail. I am very confused.
Here is my code
1. Neural network classifier class
class NeuralNetwork:
def __init__(self, X, Y, dimensions, alpha=1.2, iter=3000):
self.X = X
self.Y = Y
self.dimensions = dimensions # Including input layer and output layer. Let example be dimensions=4
self.alpha = alpha # Learning rate
self.iter = iter # Number of iterations
self.length = len(self.dimensions)-1
self.params = {} # To store parameters W and b for each layer
self.cache = {} # To store cache Z and A for each layer
self.grads = {} # To store dA, dZ, dW, db
self.cost = 1 # Initial value does not matter
def initialize(self):
np.random.seed(3)
# If dimensions is 4, then layer 0 and 3 are input and output layers
# So we only need to initialize w1, w2 and w3
# There is no need of w0 for input layer
for l in range(1, len(self.dimensions)):
self.params['W'+str(l)] = np.random.randn(self.dimensions[l], self.dimensions[l-1])*0.01
self.params['b'+str(l)] = np.zeros((self.dimensions[l], 1))
def forward_propagation(self):
self.cache['A0'] = self.X
# For last layer, ie, the output layer 3, we need to activate using sigmoid
# For layer 1 and 2, we need to use relu
for l in range(1, len(self.dimensions)-1):
self.cache['Z'+str(l)] = np.dot(self.params['W'+str(l)], self.cache['A'+str(l-1)]) + self.params['b'+str(l)]
self.cache['A'+str(l)] = relu(self.cache['Z'+str(l)])
l = len(self.dimensions)-1
self.cache['Z'+str(l)] = np.dot(self.params['W'+str(l)], self.cache['A'+str(l-1)]) + self.params['b'+str(l)]
self.cache['A'+str(l)] = sigmoid(self.cache['Z'+str(l)])
def compute_cost(self):
m = self.Y.shape[1]
A = self.cache['A'+str(len(self.dimensions)-1)]
self.cost = -1/m*np.sum(np.multiply(self.Y, np.log(A)) + np.multiply(1-self.Y, np.log(1-A)))
self.cost = np.squeeze(self.cost)
def backward_propagation(self):
A = self.cache['A' + str(len(self.dimensions) - 1)]
m = self.X.shape[1]
self.grads['dA'+str(len(self.dimensions)-1)] = -(np.divide(self.Y, A) - np.divide(1-self.Y, 1-A))
# Sigmoid derivative for final layer
l = len(self.dimensions)-1
self.grads['dZ' + str(l)] = self.grads['dA' + str(l)] * sigmoid_prime(self.cache['Z' + str(l)])
self.grads['dW' + str(l)] = 1 / m * np.dot(self.grads['dZ' + str(l)], self.cache['A' + str(l - 1)].T)
self.grads['db' + str(l)] = 1 / m * np.sum(self.grads['dZ' + str(l)], axis=1, keepdims=True)
self.grads['dA' + str(l - 1)] = np.dot(self.params['W' + str(l)].T, self.grads['dZ' + str(l)])
# Relu derivative for previous layers
for l in range(len(self.dimensions)-2, 0, -1):
self.grads['dZ'+str(l)] = self.grads['dA'+str(l)] * relu_prime(self.cache['Z'+str(l)])
self.grads['dW'+str(l)] = 1/m*np.dot(self.grads['dZ'+str(l)], self.cache['A'+str(l-1)].T)
self.grads['db'+str(l)] = 1/m*np.sum(self.grads['dZ'+str(l)], axis=1, keepdims=True)
self.grads['dA'+str(l-1)] = np.dot(self.params['W'+str(l)].T, self.grads['dZ'+str(l)])
def update_parameters(self):
for l in range(1, len(self.dimensions)):
self.params['W'+str(l)] = self.params['W'+str(l)] - self.alpha*self.grads['dW'+str(l)]
self.params['b'+str(l)] = self.params['b'+str(l)] - self.alpha*self.grads['db'+str(l)]
def train(self):
np.random.seed(1)
self.initialize()
for i in range(self.iter):
#print(self.params)
self.forward_propagation()
self.compute_cost()
self.backward_propagation()
self.update_parameters()
if i % 100 == 0:
print('Cost after {} iterations is {}'.format(i, self.cost))
2. Testing code for odd or even number classifier
import numpy as np
from main import NeuralNetwork
X = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
Y = np.array([[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]])
clf = NeuralNetwork(X, Y, [1, 1, 1], alpha=0.003, iter=7000)
clf.train()
3. Helper Code
import math
import numpy as np
def sigmoid_scalar(x):
return 1/(1+math.exp(-x))
def sigmoid_prime_scalar(x):
return sigmoid_scalar(x)*(1-sigmoid_scalar(x))
def relu_scalar(x):
if x > 0:
return x
else:
return 0
def relu_prime_scalar(x):
if x > 0:
return 1
else:
return 0
sigmoid = np.vectorize(sigmoid_scalar)
sigmoid_prime = np.vectorize(sigmoid_prime_scalar)
relu = np.vectorize(relu_scalar)
relu_prime = np.vectorize(relu_prime_scalar)
Output
I believe your cross-entropy derivative is wrong. Instead of this:
# WRONG!
self.grads['dA'+str(len(self.dimensions)-1)] = -(np.divide(self.Y, A) - np.divide(1-self.Y, A))
... do this:
# CORRECT
self.grads['dA'+str(len(self.dimensions)-1)] = np.divide(A - self.Y, (1 - A) * A)
See these lecture notes for the details. I think you meant the formula (5), but forgot 1-A. Anyway, use formula (6).

Tensorflow : train on mini batch, fast then slow

I am a beginner in tensorflow and I am trying to train a model using "mini batch". To do that I created a generator and iterate it. The problem I encounter is that, at the beginning of the epoch, the train seems fast (many batch per seconds) then the train slow down (1 batch per second) so I am wondering where I am wrong in my code but I do not find the problem.
def prepare_data(filename):
'''load file which give path and label for the data'''
f = open(filename, 'r')
data = [line.split() for line in f]
feat =[]
label=[]
for l in data:
feat.append(l[0])
label.append(l[1])
n_samples = len(feat)
shuf = list(range(n_samples))
random.shuffle(shuf)
count = Counter(label)
print(count)
feature = [feat[i] for i in shuf]
label = np.array(label, dtype=np.int)
return feature, label[shuf]
def get_specgrams(paths, nsamples=16000):
'''
Given list of paths, return specgrams.
'''
# read the wav files
wavs = [wavfile.read(x)[1] for x in paths]
# zero pad the shorter samples and cut off the long ones.
data = []
for wav in wavs:
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
data.append(d)
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return np.asarray(data)
def get_specgram(path, nsamples=16000):
'''
Given path, return specgrams.
'''
# read the wav files
wav = wavfile.read(path)[1]
# zero pad the shorter samples and cut off the long ones.
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return d
# multci classification binary labels
def one_hot_encode(labels, n_unique_labels=31):
n_labels = len(labels)
#print('number of unique labels:', n_unique_labels)
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return np.array(one_hot_encode, dtype=np.int)
#create_path_file('train/audio/')
def model(tr_features, tr_labels, ts_features, ts_labels):
# remove gpu device error
config = tf.ConfigProto(allow_soft_placement = True)
# parameters
BATCH_SIZE = 4
number_loop = math.ceil(len(tr_features)/BATCH_SIZE)
training_epochs = 10
n_dim = 16000
n_classes = 31 #len(np.unique(ts_labels))
n_hidden_units_one = 280
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.1
# get test data
ts_features, ts_labels = get_data(ts_features, ts_labels)
# Model
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
init = tf.initialize_all_variables()
# function and optimizers
cost_function = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train loop
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session(config = config) as sess:
sess.run(init)
for epoch in range(training_epochs):
print(' ## Epoch n°', epoch+1 )
batch = batch_generator(BATCH_SIZE, tr_features, tr_labels)
acc_total = 0.0
for cpt, (train_features_batch, train_labels_batch) in enumerate(batch):
_,cost = sess.run([optimizer,cost_function],feed_dict={X:train_features_batch,Y:train_labels_batch})
cost_history = np.append(cost_history,cost)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = accuracy.eval(feed_dict={X:train_features_batch,Y:train_labels_batch})
acc_total = (acc_total*cpt + acc)/(cpt+1)
print('Train accuracy : ', acc_total, '[',str(cpt+1), '/',str(number_loop), ']' ,flush=True, end='\r')
clear_output()
print('Train accuracy : ', acc_total)
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
y_true = sess.run(tf.argmax(ts_labels,1))
print('Test accuracy: ', round(sess.run(accuracy, feed_dict={X: ts_features, Y: ts_labels}) , 3))
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
def batch_generator(batch_size, feat_path, labels):
n_sample = len(feat_path)
ite = math.ceil(n_sample/batch_size)
for i in range(0, ite):
if i == ite-1:
label = one_hot_encode(labels[-batch_size:])
feat = get_specgrams(feat_path[-batch_size:])
yield (feat, label)
else:
label = one_hot_encode(labels[i*batch_size:i*batch_size+batch_size])
feat = get_specgrams(feat_path[i*batch_size:i*batch_size+batch_size])
yield (feat, label)
def get_data(feat_path, labels):
feat = get_specgrams(feat_path)
label = one_hot_encode(labels)
return feat, label
def __main__():
print('## Load data and shuffle')
feat_path, labels = prepare_data('data_labelised2.txt')
idx = int(len(labels)*0.8)
print("## Create Model")
model(feat_path[0:idx], labels[0:idx], feat_path[idx+1:], labels[idx+1:])
with tf.device('/gpu:0'):
__main__()