Custom Keras Metrics Class -> Metric at a certain recall value - tensorflow

I am trying to build a metric that is comparable to the metrics.PrecisionAtRecall class. Therefore, I've tried to build a custom metric by extending the keras.metrics.Metric class.
The original function is WSS = (TN + FN)/N − 1 + TP/(TP + FN) and this should be calculated at a certain recall value, for say 95%.
What I have until now is the following:
class WorkSavedOverSamplingAtRecall(tf.keras.metrics.Metric):
def __init__(self, recall, name='wss_at_recall', **kwargs):
super(WorkSavedOverSamplingAtRecall, self).__init__(name=name, **kwargs)
self.wss = self.add_weight(name='wss', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred_pos = tf.cast(backend.round(backend.clip(y_pred, 0, 1)), tf.float32)
y_pred_neg = 1 - y_pred_pos
y_pos = tf.cast(backend.round(backend.clip(y_true, 0, 1)), tf.float32)
y_neg = 1 - y_pos
fn = backend.sum(y_neg * y_pred_pos)
tn = backend.sum(y_neg * y_pred_neg)
tp = backend.sum(y_pos * y_pred_pos)
n = len(y_true) # number of studies in batch
r = tp/(tp+fn+backend.epsilon()) # recall
self.wss.assign(((tn+fn)/n)-(1+r))
def result(self):
return self.wss
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.wss.assign(0.)
How can I calculate the WSS at a certain recall? I've seen the following in tensorflow's own git repository:
def __init__(self, recall, num_thresholds=200, name=None, dtype=None):
if recall < 0 or recall > 1:
raise ValueError('`recall` must be in the range [0, 1].')
self.recall = recall
self.num_thresholds = num_thresholds
super(PrecisionAtRecall, self).__init__(
value=recall,
num_thresholds=num_thresholds,
name=name,
dtype=dtype)
But that is't really possible through the keras.metrics.Metric class

If we follow the definition of the WSS#95 given by this paper :Reducing Workload in Systematic Review Preparation Using Automated Citation Classification, then we have
For the present work, we have fixed recall at 0.95 and therefore work saved over sampling at 95% recall (WSS#95%) is:
And you could define your update function by :
class WorkSavedOverSamplingAtRecall(tf.keras.metrics.Metric):
def __init__(self, recall, name='wss_at_recall', **kwargs):
if recall < 0 or recall > 1:
raise ValueError('`recall` must be in the range [0, 1].')
self.recall = recall
super(WorkSavedOverSamplingAtRecall, self).__init__(name=name, **kwargs)
self.wss = self.add_weight(name='wss', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred_pos = tf.cast(backend.round(backend.clip(y_pred, 0, 1)), tf.float32)
y_pred_neg = 1 - y_pred_pos
y_neg = 1 - y_pos
fn = backend.sum(y_neg * y_pred_pos)
tn = backend.sum(y_neg * y_pred_neg)
n = len(y_true) # number of studies in batch
self.wss.assign(((tn+fn)/n)-(1-self.recall))
One other solution would be to extend from the tensorflow class SensitivitySpecificityBase and to implement the WSS as the PresicionAtRecall class is implemented.
By using this class, here's how the WSS is calculated :
Compute the recall at all the thresholds (200 thresholds by default).
Find the index of the threshold where the recall is closest to the requested value. (0.95 in that case).
Compute the WSS at that index.
The number of thresholds is use to match the given recall.
import tensorflow as tf
from tensorflow.python.keras.metrics import SensitivitySpecificityBase
class WorkSavedOverSamplingAtRecall(SensitivitySpecificityBase):
def __init__(self, recall, num_thresholds=200, name="wss_at_recall", dtype=None):
if recall < 0 or recall > 1:
raise ValueError('`recall` must be in the range [0, 1].')
self.recall = recall
self.num_thresholds = num_thresholds
super(WorkSavedOverSamplingAtRecall, self).__init__(
value=recall, num_thresholds=num_thresholds, name=name, dtype=dtype
)
def result(self):
recalls = tf.math.div_no_nan(
self.true_positives, self.true_positives + self.false_negatives
)
n = self.true_negatives + self.true_positives + self.false_negatives + self.false_positives
wss = tf.math.div_no_nan(
self.true_negatives+self.false_negatives, n
)
return self._find_max_under_constraint(
recalls, wss, tf.math.greater_equal
)
def get_config(self):
"""For serialization purposes"""
config = {'num_thresholds': self.num_thresholds, 'recall': self.recall}
base_config = super(WorkSavedOverSamplingAtRecall, self).get_config()
return dict(list(base_config.items()) + list(config.items()))

Related

Convert Tensorflow 1.x code with custom loss into 2.x

Suppose I have the following code written in Tensorflow 1.x where I define custom loss function. I wish to remove .compat.v1., Session, placeholder etc. and convert it into Tensorflow 2.x.
How to do so?
import DGM
import tensorflow as tf
import numpy as np
import scipy.stats as spstats
import matplotlib.pyplot as plt
from tqdm.notebook import trange
# Option parameters
phi = 10
n = 0.01
T = 4
# Solution parameters (domain on which to solve PDE)
t_low = 0.0 - 1e-10
x_low = 0.0 + 1e-10
x_high = 1.0
# neural network parameters
num_layers = 3
nodes_per_layer = 50
# Training parameters
sampling_stages = 2500 # number of times to resample new time-space domain points
steps_per_sample = 20 # number of SGD steps to take before re-sampling
# Sampling parameters
nsim_interior = 100
nsim_boundary_1 = 50
nsim_boundary_2 = 50
nsim_initial = 50
x_multiplier = 1.1 # multiplier for oversampling i.e. draw x from [x_low, x_high * x_multiplier]
def sampler(nsim_interior, nsim_boundary_1, nsim_boundary_2, nsim_initial):
''' Sample time-space points from the function's domain; points are sampled
uniformly on the interior of the domain, at the initial/terminal time points
and along the spatial boundary at different time points.
Args:
nsim_interior: number of space points in the interior of U
nsim_boundary_1: number of space points in the boundary of U
nsim_boundary_2: number of space points in the boundary of U_x
nsim_initial: number of space points at the initial time
'''
# Sampler #1: domain interior
t_interior = np.random.uniform(low=t_low, high=T, size=[nsim_interior, 1])
x_interior = np.random.uniform(low=x_low, high=x_high*x_multiplier, size=[nsim_interior, 1])
# Sampler #2: spatial boundary 1
t_boundary_1 = np.random.uniform(low=t_low, high=T, size=[nsim_boundary_1, 1])
x_boundary_1 = np.ones((nsim_boundary_1, 1))
# Sampler #3: spatial boundary 2
t_boundary_2 = np.random.uniform(low=t_low, high=T, size=[nsim_boundary_2, 1])
x_boundary_2 = np.zeros((nsim_boundary_2, 1))
# Sampler #4: initial condition
t_initial = np.zeros((nsim_initial, 1))
x_initial = np.random.uniform(low=x_low, high=x_high*x_multiplier, size=[nsim_initial, 1])
return (
t_interior, x_interior,
t_boundary_1, x_boundary_1,
t_boundary_2, x_boundary_2,
t_initial, x_initial
)
def loss(
model,
t_interior, x_interior,
t_boundary_1, x_boundary_1,
t_boundary_2, x_boundary_2,
t_initial, x_initial
):
''' Compute total loss for training.
Args:
model: DGM model object
t_interior, x_interior: sampled time / space points in the interior of U
t_boundary_1, x_boundary_1: sampled time / space points in the boundary of U
t_boundary_2, x_boundary_2: sampled time / space points in the boundary of U_x
t_initial, x_initial: sampled time / space points at the initial time
'''
# Loss term #1: PDE
# compute function value and derivatives at current sampled points
u = model(t_interior, x_interior)
u_t = tf.gradients(ys=u, xs=t_interior)[0]
u_x = tf.gradients(ys=u, xs=x_interior)[0]
u_xx = tf.gradients(ys=u_x, xs=x_interior)[0]
diff_u = u_t - u_xx + phi**2 * (tf.nn.relu(u) + 1e-10)**n
# compute average L2-norm for the PDE
L1 = tf.reduce_mean(input_tensor=tf.square(diff_u))
# Loss term #2: First b. c.
u = model(t_boundary_1, x_boundary_1)
bc1_error = u - 1
# Loss term #3: Second b. c.
u = model(t_boundary_2, x_boundary_2)
u_x = tf.gradients(ys=u, xs=x_boundary_2)[0]
bc2_error = u_x - 0
# Loss term #3: Initial condition
u = model(t_initial, x_initial)
init_error = u - 1
# compute average L2-norm for the initial/boundary conditions
L2 = tf.reduce_mean(input_tensor=tf.square(bc1_error + bc2_error + init_error))
return L1, L2
# initialize DGM model (last input: space dimension = 1)
model = DGM.DGMNet(nodes_per_layer, num_layers, 1)
# tensor placeholders (_tnsr suffix indicates tensors)
# inputs (time, space domain interior, space domain at initial time)
t_interior_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
x_interior_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
t_boundary_1_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
x_boundary_1_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
t_boundary_2_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
x_boundary_2_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
t_initial_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
x_initial_tnsr = tf.compat.v1.placeholder(tf.float32, [None,1])
# loss
L1_tnsr, L2_tnsr = loss(
model,
t_interior_tnsr, x_interior_tnsr,
t_boundary_1_tnsr, x_boundary_1_tnsr,
t_boundary_2_tnsr, x_boundary_2_tnsr,
t_initial_tnsr, x_initial_tnsr
)
loss_tnsr = L1_tnsr + L2_tnsr
# set optimizer
starting_learning_rate = 3e-4
global_step = tf.Variable(0, trainable=False)
lr = tf.compat.v1.train.exponential_decay(
learning_rate=starting_learning_rate,
global_step=global_step,
decay_steps=1e5,
decay_rate=0.96,
staircase=True,
)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=lr).minimize(loss_tnsr)
# initialize variables
init_op = tf.compat.v1.global_variables_initializer()
# open session
sess = tf.compat.v1.Session()
sess.run(init_op)
try:
model.load_weights("checkpoint/")
print("Loading from checkpoint.")
except:
print("Checkpoint not found.")
# for each sampling stage
for i in trange(sampling_stages):
# sample uniformly from the required regions
t_interior, x_interior, \
t_boundary_1, x_boundary_1, \
t_boundary_2, x_boundary_2, \
t_initial, x_initial = sampler(
nsim_interior, nsim_boundary_1, nsim_boundary_2, nsim_initial
)
# for a given sample, take the required number of SGD steps
for _ in range(steps_per_sample):
loss, L1, L2, _ = sess.run(
[loss_tnsr, L1_tnsr, L2_tnsr, optimizer],
feed_dict = {
t_interior_tnsr: t_interior,
x_interior_tnsr: x_interior,
t_boundary_1_tnsr: t_boundary_1,
x_boundary_1_tnsr: x_boundary_1,
t_boundary_2_tnsr: t_boundary_2,
x_boundary_2_tnsr: x_boundary_2,
t_initial_tnsr: t_initial,
x_initial_tnsr: x_initial,
}
)
if i % 10 == 0:
print(f"Loss: {loss:.5f},\t L1: {L1:.5f},\t L2: {L2:.5f},\t iteration: {i}")
model.save_weights("checkpoint/")
I tried searching how to implement custom loss functions with model as an argument, but couldn't implement it.
For model.compile there is a loss argument for which you can pass the Loss function. May be a string (name of loss function), or a tf.keras.losses.Loss instance. For example
Model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
loss=tf.keras.losses.BinaryCrossentropy())
If you have created your custom loss function you can also pass that loss function to the loss argument by providing the name of that loss function. For example
def my_loss_fn(y_true, y_pred):
squared_difference = tf.square(y_true - y_pred)
return tf.reduce_mean(squared_difference, axis=-1)
model.compile(optimizer='adam', loss=my_loss_fn)
Thank You.

how to implement moving max (and min) calculation in the customized tf2.keras layer

During the training procedure, I want to calculate the moving maximum(and minimum) values of a batch feature maps and then I will implement quantization alogrithm based on the moving max (or min) values. For example: moving_max = (1-momentum)x(previous moving_max) + momentum x (current max value of a batch).
I implement the following codes based on the customized tf2.keras.layer:
from tensorflow.keras.layers import Layer
class QATQuantizerLayer(Layer):
def __init__(self, num_bits, momentum=0.01, **kwargs):
super(QATQuantizerLayer, self).__init__(**kwargs)
self.num_bits = num_bits
self.momentum = momentum
self.num_flag = 0
self.quant_min_val = 0
self.quant_max_val = (1 << self.num_bits) - 1
self.quant_range = float(self.quant_max_val - self.quant_min_val)
def build(self, input_shape):
self.moving_min = self.add_weight("moving_min", shape=(1,), initializer=tf.constant_initializer(-6), trainable=False)
self.moving_max = self.add_weight("moving_max", shape=(1,), initializer=tf.constant_initializer(6), trainable=False)
return super(QATQuantizerLayer, self).build(input_shape)
def call(self, inputs, training, **kwargs):
if training is None:
training = False
if training == True:
batch_min = tf.reduce_min(inputs)
batch_max = tf.reduce_max(inputs)
if self.num_flag == 0:
self.num_flag += 1
self.moving_min = batch_min
self.moving_max = batch_max
else:
temp_min = (1 - self.momentum) * self.moving_min + self.momentum * batch_min
temp_max = (1 - self.momentum) * self.moving_max + self.momentum * batch_max
self.moving_min = temp_min
self.moving_max = temp_max
float_range = self.moving_max - self.moving_min
scale = float_range / self.quant_range
scale = tf.maximum(scale, tf.keras.backend.epsilon())
zero_point = tf.math.round(self.moving_min / scale)
output = (tf.clip_by_value(_round_imp(inputs / scale) - zero_point,
self.quant_min_val, self.quant_max_val) + zero_point) * scale
return output
However, when I start to train I get the following errors:
TypeError: An op outside of the function building code is being passed a "Graph" tensor. It is possible to have Graph tensors leak out of the function building context by including a tf.init_scope in your function building code. For example, the following function will fail:......
If I change the following statement: [temp_min = (1 - self.momentum) * self.moving_min + self.momentum * batch_min] to [temp_min = (1 - self.momentum) + self.momentum * batch_min], the error is disappeared. (That is, remove self.moving_min from the statement)
How can I solve this problem?
Thank you very much.

PPO: NaN Policy return in Tensorflow Keras

I am trying to implement the PPO algorithm with clipped loss in addition to KL penalties and run training on Mujuco Gym environments. After ~ 15000 gradient steps, policy collapses into returning NaN.
These are the policy training info before the policy collapses:
A: tf.Tensor(-0.10426917, shape=(), dtype=float32)
LOG_A: tf.Tensor(37.021107, shape=(), dtype=float32)
LOSS: tf.Tensor(0.16812761, shape=(), dtype=float32)
GRAD: tf.Tensor(
[[-3.4624012e-04 -1.2807851e-04 -1.9778654e-01 ... -2.7586846e+00
-1.2552655e-01 -1.7212760e-03]
[ 4.6312678e-05 -2.2251482e-04 5.5088173e-03 ... 9.5249921e-02
2.2186586e-03 2.0080474e-04]
[ 2.0314787e-05 -1.6381161e-04 7.1509695e-03 ... 1.1740552e-01
3.4010289e-03 1.2105847e-04]
...
[ 1.7827883e-04 -1.1712313e-05 5.8873045e-01 ... 9.2354174e+00
2.9186043e-01 -2.2818900e-03]
[-9.0385452e-05 3.0951984e-03 -3.6487404e-02 ... -2.6829168e-01
-3.9602429e-02 2.0654879e-03]
[ 2.2925157e-04 4.6892464e-03 5.9946489e-01 ... 9.3497839e+00
3.0514282e-01 -1.3834883e-03]], shape=(11, 256), dtype=float32)
A: tf.Tensor(nan, shape=(), dtype=float32)
LOG_A: tf.Tensor(nan, shape=(), dtype=float32)
Note: The gradient info captures only the gradients of the first layer, as I have found capturing all gradient info to be messy and seemingly redundant.
What I have tried:
Tuning hyperparameters: I have tried multiple sets of hyperparameters including the one documented in the original paper. The same error occurs(the hyperparams setup provided in the example below are chosen for higher sampling efficiency for faster debugging).
Gradient clipping: Gradient norm has been clipped to be unitary, and as shown above, it does not appear to have the exploding gradient issue.
Guaranteed numerical stability of tanh squashing of policy log probability: A small epsilon was used to clip the sum of squares so that action log probability does not return inf after tanh squashing.
Unitized code example:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import gym
import scipy.signal
import time
from tensorflow.keras import Model
import matplotlib.pyplot as plt
import random
import tensorflow_probability as tfp
tf.keras.backend.set_floatx('float32')
EPSILON = 1e-10
################## GLOBAL SETUP P1 ##################
problem = "Hopper-v2"
env = gym.make(problem)
eval_env = gym.make(problem)
num_states = env.observation_space.shape[0]
print("Size of State Space -> {}".format(num_states), flush=True)
num_actions = env.action_space.shape[0]
print("Size of Action Space -> {}".format(num_actions), flush=True)
upper_bound = env.action_space.high[0]
lower_bound = env.action_space.low[0]
print("Max Value of Action -> {}".format(upper_bound), flush=True)
print("Min Value of Action -> {}".format(lower_bound), flush=True)
minibatch_size = 256
##########*****####################*****##########
#################### Auxiliaries ####################
def discounted_cumulative_sums(x, discount):
# Discounted cumulative sums of vectors for computing rewards-to-go and advantage estimates
return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]
##########*****####################*****##########
#################### Replay Buffer ####################
class Buffer:
def __init__(self, observation_dimensions, action_dimensions, size, gamma=0.99, lam=0.95):
self.observation_buffer = np.zeros(
(size, observation_dimensions), dtype=np.float32
)
self.action_buffer = np.zeros((size, action_dimensions), dtype=np.int32)
self.advantage_buffer = np.zeros(size, dtype=np.float32)
self.reward_buffer = np.zeros(size, dtype=np.float32)
self.return_buffer = np.zeros(size, dtype=np.float32)
self.value_buffer = np.zeros(size, dtype=np.float32)
self.logprobability_buffer = np.zeros(size, dtype=np.float32)
self.gamma, self.lam = gamma, lam
self.pointer, self.trajectory_start_index = 0, 0
def store(self, observation, action, reward, value, logprobability):
self.observation_buffer[self.pointer] = observation
self.action_buffer[self.pointer] = action
self.reward_buffer[self.pointer] = reward
self.value_buffer[self.pointer] = value
self.logprobability_buffer[self.pointer] = logprobability
self.pointer += 1
def finish_trajectory(self, last_value=0):
path_slice = slice(self.trajectory_start_index, self.pointer)
rewards = np.append(self.reward_buffer[path_slice], last_value)
values = np.append(self.value_buffer[path_slice], last_value)
deltas = rewards[:-1] + self.gamma * values[1:] - values[:-1]
self.advantage_buffer[path_slice] = discounted_cumulative_sums(
deltas, self.gamma * self.lam
)
self.return_buffer[path_slice] = discounted_cumulative_sums(
rewards, self.gamma
)[:-1]
self.trajectory_start_index = self.pointer
def get(self):
# Get all data of the buffer and normalize the advantages
rindex = np.random.choice(self.pointer, minibatch_size)
advantage_mean, advantage_std = (
np.mean(self.advantage_buffer[rindex]),
np.std(self.advantage_buffer[rindex]),
)
return (
self.observation_buffer[rindex],
self.action_buffer[rindex],
(self.advantage_buffer[rindex] - advantage_mean) / advantage_std,
self.return_buffer[rindex],
self.logprobability_buffer[rindex],
)
def clear(self):
self.pointer, self.trajectory_start_index = 0, 0
##########*****####################*****##########
#################### Models ####################
class Actor(Model):
def __init__(self):
super().__init__()
self.action_dim = num_actions
self.dense1_layer = layers.Dense(256, activation="relu")
self.dense2_layer = layers.Dense(256, activation="relu")
self.mean_layer = layers.Dense(self.action_dim)
self.stdev_layer = layers.Dense(self.action_dim)
def call(self, state, eval_mode=False):
a1 = self.dense1_layer(state)
a2 = self.dense2_layer(a1)
mu = self.mean_layer(a2)
log_sigma = self.stdev_layer(a2)
sigma = tf.exp(log_sigma)
covar_m = tf.linalg.diag(sigma**2)
dist = tfp.distributions.MultivariateNormalTriL(loc=mu, scale_tril=tf.linalg.cholesky(covar_m))
if eval_mode:
action_ = mu
else:
action_ = dist.sample()
action = tf.tanh(action_)
log_pi_ = dist.log_prob(action_)
log_pi = log_pi_ - tf.reduce_sum(tf.math.log(tf.clip_by_value(1 - action**2, EPSILON, 1.0)), axis=1)
return action*upper_bound, log_pi
def get_critic():
state_input = layers.Input(shape=(num_states))
state_out = layers.Dense(256, activation="relu")(state_input)
out = layers.Dense(256, activation="relu")(state_out)
outputs = layers.Dense(1, dtype='float32')(out)
model = tf.keras.Model(state_input, outputs)
return model
##########*****####################*****##########
#################### GLOBAL SETUP P2 ####################
# Hyperparameters of the PPO algorithm
horizon = 2048
iterations = 2000
gamma = 0.99
clip_ratio = 0.2
epochs = 500
lam = 0.97
target_kl = 0.01
beta = 1.0
render = False
actor_model = Actor()
critic_model = get_critic()
lr = 0.0003
policy_optimizer = tf.keras.optimizers.Adam(learning_rate=lr,
# )
clipnorm=1.0)
value_optimizer = tf.keras.optimizers.Adam(learning_rate=lr,
# )
clipnorm=1.0)
buffer = Buffer(num_states, num_actions, horizon)
##########*****####################*****##########
#################### Training ####################
observation, episode_return, episode_length = env.reset(), 0, 0
tf_observation = tf.expand_dims(observation, 0)
def train_policy(
observation_buffer, action_buffer, logprobability_buffer, advantage_buffer
):
global beta
with tf.GradientTape() as tape: # Record operations for automatic differentiation.
action, log_a = actor_model(observation_buffer)
# print("A: ", tf.reduce_mean(action))
# print("LOG_A: ", tf.reduce_mean(log_a))
ratio = tf.exp(
log_a
- logprobability_buffer
)
# print("R: ", tf.reduce_mean(ratio), flush=True)
cd_ratio = tf.clip_by_value(ratio, (1 - clip_ratio), (1 + clip_ratio))
min_advantage = cd_ratio * advantage_buffer
_kl = -beta*tf.math.reduce_max(logprobability_buffer - log_a)
policy_loss = -tf.reduce_mean(tf.minimum(ratio * advantage_buffer, min_advantage) + _kl)
# print("LOSS: ", policy_loss)
policy_grads = tape.gradient(policy_loss, actor_model.trainable_variables)
policy_optimizer.apply_gradients(zip(policy_grads, actor_model.trainable_variables))
# print("GRAD: ", policy_grads[0], flush=True)
action_opt, log_a_opt = actor_model(observation_buffer)
kl = tf.reduce_mean(
logprobability_buffer
- log_a_opt
)
if kl < target_kl/1.5:
beta = beta/2
if kl > target_kl*1.5:
beta = beta*2
return kl
def train_value_function(observation_buffer, return_buffer):
with tf.GradientTape() as tape: # Record operations for automatic differentiation.
value_loss = tf.reduce_mean((return_buffer - critic_model(observation_buffer)) ** 2)
value_grads = tape.gradient(value_loss, critic_model.trainable_variables)
value_optimizer.apply_gradients(zip(value_grads, critic_model.trainable_variables))
for ite in range(iterations):
for t in range(horizon):
if render:
env.render()
action, log_pi_a = actor_model(tf_observation)
action = action[0]
observation_new, reward, done, _ = env.step(action)
episode_return += reward
episode_length += 1
value_t = critic_model(tf_observation)
buffer.store(observation, action, reward, value_t, log_pi_a)
observation = observation_new
tf_observation = tf.expand_dims(observation, 0)
terminal = done
if terminal or (t == horizon - 1):
last_value = 0 if done else critic_model(tf_observation)
buffer.finish_trajectory(last_value)
observation, episode_return, episode_length = env.reset(), 0, 0
tf_observation = tf.expand_dims(observation, 0)
for _ in range(epochs):
(
observation_buffer,
action_buffer,
advantage_buffer,
return_buffer,
logprobability_buffer,
) = buffer.get()
kl = train_policy(
observation_buffer, action_buffer, logprobability_buffer, advantage_buffer
)
train_value_function(observation_buffer, return_buffer)
buffer.clear()
##########*****####################*****##########
Note:
The code base is constructed by a combination of a modified version of the official keras PPO tutorial(https://keras.io/examples/rl/ppo_cartpole/) and Modules(Mainly the policy network) that have been tested in other implementations.
I refrained from using tf_function declaration as I am very new to tensorflow, thus not understanding its impact, and I have read from various github issues that sometimes such declaration causes numerical instability due to caching. However, it could be a source of my issues.
Any help is appreciated, and apologies if something is missing or unclear.

How to convert a pytorch script into tensorflow?

I am trying to convert a pytorch script into tensorflow, how may I do so? Do we do it line by line or does the overall structure change in tensorflow?
Please, someone help me with this and provide some usefull link for this!
The code refers to graph convolution network. I see that pytorch_geometric has predefined modules like MessagePassing from which GCNConv is inheriting.
Is there any similar module in tensorflow?
GCN script :
import torch
from torch.nn import Parameter
from torch_scatter import scatter_add
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops
from inits import glorot, zeros
import pdb
class GCNConv(MessagePassing):
def __init__(self,
in_channels,
out_channels,
improved=False,
cached=False,
bias=True):
super(GCNConv, self).__init__('add')
self.in_channels = in_channels
self.out_channels = out_channels
self.improved = improved
self.cached = cached
self.cached_result = None
self.weight = Parameter(torch.Tensor(in_channels, out_channels))
if bias:
self.bias = Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
glorot(self.weight)
zeros(self.bias)
self.cached_result = None
#staticmethod
def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
if edge_weight is None:
edge_weight = torch.ones((edge_index.size(1), ),
dtype=dtype,
device=edge_index.device)
edge_weight = edge_weight.view(-1)
assert edge_weight.size(0) == edge_index.size(1)
edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
edge_index, _ = add_self_loops(edge_index, num_nodes=num_nodes)
loop_weight = torch.full((num_nodes, ),
1 if not improved else 2,
dtype=edge_weight.dtype,
device=edge_weight.device)
edge_weight = torch.cat([edge_weight, loop_weight], dim=0)
row, col = edge_index
deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
deg_inv_sqrt = deg.pow(-1)
deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
return edge_index, deg_inv_sqrt[col] * edge_weight
def forward(self, x, edge_index, edge_weight=None):
""""""
x = torch.matmul(x, self.weight)
if not self.cached or self.cached_result is None:
edge_index, norm = self.norm(edge_index, x.size(0), edge_weight,
self.improved, x.dtype)
self.cached_result = edge_index, norm
edge_index, norm = self.cached_result
return self.propagate(edge_index, x=x, norm=norm)
def message(self, x_j, norm):
return norm.view(-1, 1) * x_j
def update(self, aggr_out):
if self.bias is not None:
aggr_out = aggr_out + self.bias
return aggr_out
def __repr__(self):
return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
self.out_channels)
The script is of a graph convolutional network. (source: https://github.com/seongjunyun/Graph_Transformer_Networks )

Adam custom implementation by PyTorch

I'm trying to code my own implementation of Adam optimization algorithm, but when I try to find the optimum for function f(x,y) = xx + yy, method generates an unexpected output.
Here is the code and graph for each point on Adam's path and more simple algorithm - SGD's path.
class optimizer:
def __init__(self, params):
self.parameters = list(params)
def zero_grad(self):
for param in self.parameters: # Have to be an iter object.
try:
param.grad.zero_()
except:
pass
def step(self):
pass
class Adam(optimizer):
def __init__(self, params, lr, beta1=0.9, beta2=0.999):
self.parameters = list(params)
self.lr = lr
self.beta1 = beta1
self.beta2 = beta2
self.EMA1 = [torch.zeros_like(param) for param in self.parameters]
self.EMA2 = [torch.zeros_like(param) for param in self.parameters]
self.iter_num = 0
self.eps = 1e-9
def step(self):
self.iter_num += 1
correct1 = 1 - self.beta1**self.iter_num # EMA1 bias correction.
correct2 = 1 - self.beta2**self.iter_num # EMA2 bias correction.
with torch.no_grad():
for param, EMA1, EMA2 in zip(self.parameters, self.EMA1, self.EMA2):
EMA1.set_((1 - self.beta1) * param.grad + self.beta1 * EMA1)
EMA2.set_((1 - self.beta2) * (param.grad**2) + self.beta2 * EMA2)
numenator = EMA1 / correct1
denominator = (EMA2 / correct2).sqrt() + self.eps
param -= self.lr * numenator / denominator