How to convert a pytorch script into tensorflow? - tensorflow

I am trying to convert a pytorch script into tensorflow, how may I do so? Do we do it line by line or does the overall structure change in tensorflow?
Please, someone help me with this and provide some usefull link for this!
The code refers to graph convolution network. I see that pytorch_geometric has predefined modules like MessagePassing from which GCNConv is inheriting.
Is there any similar module in tensorflow?
GCN script :
import torch
from torch.nn import Parameter
from torch_scatter import scatter_add
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops
from inits import glorot, zeros
import pdb
class GCNConv(MessagePassing):
def __init__(self,
in_channels,
out_channels,
improved=False,
cached=False,
bias=True):
super(GCNConv, self).__init__('add')
self.in_channels = in_channels
self.out_channels = out_channels
self.improved = improved
self.cached = cached
self.cached_result = None
self.weight = Parameter(torch.Tensor(in_channels, out_channels))
if bias:
self.bias = Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
glorot(self.weight)
zeros(self.bias)
self.cached_result = None
#staticmethod
def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
if edge_weight is None:
edge_weight = torch.ones((edge_index.size(1), ),
dtype=dtype,
device=edge_index.device)
edge_weight = edge_weight.view(-1)
assert edge_weight.size(0) == edge_index.size(1)
edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
edge_index, _ = add_self_loops(edge_index, num_nodes=num_nodes)
loop_weight = torch.full((num_nodes, ),
1 if not improved else 2,
dtype=edge_weight.dtype,
device=edge_weight.device)
edge_weight = torch.cat([edge_weight, loop_weight], dim=0)
row, col = edge_index
deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
deg_inv_sqrt = deg.pow(-1)
deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
return edge_index, deg_inv_sqrt[col] * edge_weight
def forward(self, x, edge_index, edge_weight=None):
""""""
x = torch.matmul(x, self.weight)
if not self.cached or self.cached_result is None:
edge_index, norm = self.norm(edge_index, x.size(0), edge_weight,
self.improved, x.dtype)
self.cached_result = edge_index, norm
edge_index, norm = self.cached_result
return self.propagate(edge_index, x=x, norm=norm)
def message(self, x_j, norm):
return norm.view(-1, 1) * x_j
def update(self, aggr_out):
if self.bias is not None:
aggr_out = aggr_out + self.bias
return aggr_out
def __repr__(self):
return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
self.out_channels)
The script is of a graph convolutional network. (source: https://github.com/seongjunyun/Graph_Transformer_Networks )

Related

How to concat laserembeddings with huggingface funnel transformers simple CLS output for NLP sequence classification task?

i was approaching NLP sequence classification problem (3 classes) using huggingface transformers (funnel-transformer/large) and tensorflow.
first i created laserembedding like this :
from laserembeddings import Laser
laser = Laser()
df = pd.read_csv("mycsv.csv")
embeds = laser.embed_sentences(df['text'].values, lang='en')
write_pickle_to_file('train.pkl', embeds )
part 1 : Tensorflow version
for data preparation i use code like below :
df['text']=temp['column1']+tokenizer.sep_token+temp['column2']+tokenizer.sep_token+temp['column3']
def encode_text(texts):
enc_di = tokenizer.batch_encode_plus(
texts,
padding='max_length',
truncation=True,
return_token_type_ids=True,
pad_to_max_length=True,
max_length=cfg.max_len
)
return [np.asarray(enc_di['input_ids'], dtype=np.int64),
np.asarray(enc_di['attention_mask'], dtype=np.int64),
np.asarray(enc_di['token_type_ids'], dtype=np.int64)]
then inside training function :
x_train = encode_text(df.text.to_list())
train_ds = (
tf.data.Dataset
.from_tensor_slices((
{
"input_ids": x_train[0],
"input_masks": x_train[1],
"input_segments": x_train[2],
"lasers": np.array( train[laser_columns].values, dtype=np.float32 ) #laser_columns contains all the laser embedded columns
},
tf.one_hot(df["label"].to_list(), 3) #3 class
))
.repeat()
.shuffle(2048)
.batch(cfg.batch_size)
.prefetch(AUTO)
)
i add laser embedding in my model like this :
def create_model():
transformer = transformers.TFAutoModel.from_pretrained(cfg.pretrained,config=config,from_pt=True)
max_len=512
# transformer
input_ids = Input(shape=(max_len,), dtype="int32", name="input_ids")
input_masks = Input(shape=(max_len,), dtype="int32", name="input_masks")
input_segments = Input(shape=(max_len,), dtype="int32", name="input_segments")
sequence_output = transformer(input_ids, attention_mask=input_masks, token_type_ids=input_segments)[0]
cls_token = sequence_output[:, 0, :]
# lasers
lasers = Input(shape=(n_lasers,), dtype=tf.float32, name="lasers") #n_lasers = 1024
lasers_output = tf.keras.layers.Dense(n_lasers, activation='tanh')(lasers)
x = tf.keras.layers.Concatenate()([cls_token, lasers_output])
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(2048, activation='tanh')(x)
x = tf.keras.layers.Dropout(0.1)(x)
out = tf.keras.layers.Dense(3, activation='softmax')(x)
model = Model(inputs=[input_ids, input_masks, input_segments, lasers], outputs=out)
model.compile(Adam(lr=1e-5), loss=losses.CategoricalCrossentropy(), metrics=["acc", metrics.CategoricalCrossentropy(name='xentropy')])
return model
now my question is, how do we do the same with pytorch for exact same problem and same dataset?
part 2 : pytorch version
df = pd.read_csv("mytrain.csv")
class myDataset(Dataset):
def __init__(self,df, max_length, tokenizer, training=True):
self.df = df
self.max_len = max_length
self.tokenizer = tokenizer
self.column1 = self.df['column1'].values
self.column2 = self.df['column2'].values
self.column3= self.df['column3'].values
self.column4= self.df['column4'].values
self.training = training
if self.training:
self.targets = self.df['label'].values
def __len__(self):
return len(self.df)
def __getitem__(self, index):
column1 = self.column1[index]
column2= self.column2[index]
column3= self.column3[index]
text0 = self.column4[index]
text1 = column1 + ' ' + column2+ ' ' + column3
inputs = self.tokenizer.encode_plus(
text1 ,
text0 ,
truncation = True,
add_special_tokens = True,
return_token_type_ids = True,
is_split_into_words=False,
max_length = self.max_len
)
samples = {
'input_ids': inputs['input_ids'],
'attention_mask': inputs['attention_mask'],
}
if 'token_type_ids' in inputs:
samples['token_type_ids'] = inputs['token_type_ids']
if self.training:
samples['target'] = self.targets[index]
return samples
collate_fn = DataCollatorWithPadding(tokenizer=CONFIG['tokenizer'])
class myModel(nn.Module):
def __init__(self, model_name):
super(myModel, self).__init__()
self.model = AutoModel.from_pretrained(model_name)
if(True):
print("using gradient_checkpoint...")
self.model.gradient_checkpointing_enable()
self.config = AutoConfig.from_pretrained(model_name)
self.config.update(
{
"output_hidden_states": True,
"hidden_dropout_prob": 0.0,
"layer_norm_eps": 1e-7,
"add_pooling_layer": False,
"attention_probs_dropout_prob":0.0,
}
)
self.fc = nn.Linear(self.config.hidden_size, 3)
def forward(self, ids, mask):
out = self.model(input_ids=ids,attention_mask=mask,output_hidden_states=False)
out = out[0][:, 0, :]
outputs = self.fc(out)
return outputs
and in train and validation loop i have code like this :
bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
ids = data['input_ids'].to(device, dtype = torch.long)
mask = data['attention_mask'].to(device, dtype = torch.long)
targets = data['target'].to(device, dtype=torch.long)
batch_size = ids.size(0)
optimizer.zero_grad()
# forward pass with `autocast` context manager
with autocast(enabled=True):
outputs = model(ids, mask)
loss = loss_fct(outputs, targets)
i would like to know where and how in my huggingface pytorch pipeline i can use the laserembedding that i created earlier and used in tensorflow huggingface model?
i would like to concat laserembeddings with funnel transformer's simple CLS token output and train the transformers model with laser embed as extra feature in pytorch implementation exactly like i did in tensorflow example,do you know how to modify my pytorch code to make it working in pytorch? the tensorflow implementation with laserembedding concatenated above that i have posted here works good,i just wanted to do the same in pytorch implementation,,your help is highly appreciated,thanks in advance

Custom Keras Metrics Class -> Metric at a certain recall value

I am trying to build a metric that is comparable to the metrics.PrecisionAtRecall class. Therefore, I've tried to build a custom metric by extending the keras.metrics.Metric class.
The original function is WSS = (TN + FN)/N − 1 + TP/(TP + FN) and this should be calculated at a certain recall value, for say 95%.
What I have until now is the following:
class WorkSavedOverSamplingAtRecall(tf.keras.metrics.Metric):
def __init__(self, recall, name='wss_at_recall', **kwargs):
super(WorkSavedOverSamplingAtRecall, self).__init__(name=name, **kwargs)
self.wss = self.add_weight(name='wss', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred_pos = tf.cast(backend.round(backend.clip(y_pred, 0, 1)), tf.float32)
y_pred_neg = 1 - y_pred_pos
y_pos = tf.cast(backend.round(backend.clip(y_true, 0, 1)), tf.float32)
y_neg = 1 - y_pos
fn = backend.sum(y_neg * y_pred_pos)
tn = backend.sum(y_neg * y_pred_neg)
tp = backend.sum(y_pos * y_pred_pos)
n = len(y_true) # number of studies in batch
r = tp/(tp+fn+backend.epsilon()) # recall
self.wss.assign(((tn+fn)/n)-(1+r))
def result(self):
return self.wss
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.wss.assign(0.)
How can I calculate the WSS at a certain recall? I've seen the following in tensorflow's own git repository:
def __init__(self, recall, num_thresholds=200, name=None, dtype=None):
if recall < 0 or recall > 1:
raise ValueError('`recall` must be in the range [0, 1].')
self.recall = recall
self.num_thresholds = num_thresholds
super(PrecisionAtRecall, self).__init__(
value=recall,
num_thresholds=num_thresholds,
name=name,
dtype=dtype)
But that is't really possible through the keras.metrics.Metric class
If we follow the definition of the WSS#95 given by this paper :Reducing Workload in Systematic Review Preparation Using Automated Citation Classification, then we have
For the present work, we have fixed recall at 0.95 and therefore work saved over sampling at 95% recall (WSS#95%) is:
And you could define your update function by :
class WorkSavedOverSamplingAtRecall(tf.keras.metrics.Metric):
def __init__(self, recall, name='wss_at_recall', **kwargs):
if recall < 0 or recall > 1:
raise ValueError('`recall` must be in the range [0, 1].')
self.recall = recall
super(WorkSavedOverSamplingAtRecall, self).__init__(name=name, **kwargs)
self.wss = self.add_weight(name='wss', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred_pos = tf.cast(backend.round(backend.clip(y_pred, 0, 1)), tf.float32)
y_pred_neg = 1 - y_pred_pos
y_neg = 1 - y_pos
fn = backend.sum(y_neg * y_pred_pos)
tn = backend.sum(y_neg * y_pred_neg)
n = len(y_true) # number of studies in batch
self.wss.assign(((tn+fn)/n)-(1-self.recall))
One other solution would be to extend from the tensorflow class SensitivitySpecificityBase and to implement the WSS as the PresicionAtRecall class is implemented.
By using this class, here's how the WSS is calculated :
Compute the recall at all the thresholds (200 thresholds by default).
Find the index of the threshold where the recall is closest to the requested value. (0.95 in that case).
Compute the WSS at that index.
The number of thresholds is use to match the given recall.
import tensorflow as tf
from tensorflow.python.keras.metrics import SensitivitySpecificityBase
class WorkSavedOverSamplingAtRecall(SensitivitySpecificityBase):
def __init__(self, recall, num_thresholds=200, name="wss_at_recall", dtype=None):
if recall < 0 or recall > 1:
raise ValueError('`recall` must be in the range [0, 1].')
self.recall = recall
self.num_thresholds = num_thresholds
super(WorkSavedOverSamplingAtRecall, self).__init__(
value=recall, num_thresholds=num_thresholds, name=name, dtype=dtype
)
def result(self):
recalls = tf.math.div_no_nan(
self.true_positives, self.true_positives + self.false_negatives
)
n = self.true_negatives + self.true_positives + self.false_negatives + self.false_positives
wss = tf.math.div_no_nan(
self.true_negatives+self.false_negatives, n
)
return self._find_max_under_constraint(
recalls, wss, tf.math.greater_equal
)
def get_config(self):
"""For serialization purposes"""
config = {'num_thresholds': self.num_thresholds, 'recall': self.recall}
base_config = super(WorkSavedOverSamplingAtRecall, self).get_config()
return dict(list(base_config.items()) + list(config.items()))

In tensorflow, for custom layers that need arguments at instantialion, does the get_config method need overriding?

Ubuntu - 20.04,
Tensorflow - 2.2.0,
Tensorboard - 2.2.1
I have read that one needs to reimplement the config method in order for a custom layer to be serializable.
I have a custom layer that accepts arguments in its __init__. It uses another custom layer and that consumes arguments in its __init__ as well. I can:
Without Tensorboard callbacks:
Use them in a model both in eager model and graph form
Run tf.saved_model.save and it executes without a glich
Load the thus saved model using tf.saved_model.load and it loads the model saved in 2. above
I can call model(input) the loaded model. I can also call 'call_and_return_all_conditional_losses(input)` and they run right as well
With Tensorboard callbacks:
All of the above (can .fit, save, load, predict from loaded etc) except.. While running fit i get
WARNING:tensorflow:Model failed to serialize as JSON. Ignoring... Layer PREPROCESS_MONSOON has arguments in `__init__` and therefore must override `get_config`.
Pasting the entire code here that can be run end to end. You just need to have tensorflow 2 installed. Please delete/add the callbacks (only tensorboard callbacks is there) to .fit to see the two behaviors mentioned above
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers as l
from tensorflow import keras as k
import numpy as np
##making empty directories
import os
os.makedirs('r_data',exist_ok=True)
os.makedirs('r_savedir',exist_ok=True)
#Preparing the dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train_ = pd.DataFrame(x_train.reshape(60000,-1),columns = ['col_'+str(i) for i in range(28*28)])
x_test_ = pd.DataFrame(x_test.reshape(10000,-1),columns = ['col_'+str(i) for i in range(28*28)])
x_train_['col_cat1'] = [np.random.choice(['a','b','c','d','e','f','g','h','i']) for i in range(x_train_.shape[0])]
x_test_['col_cat1'] = [np.random.choice(['a','b','c','d','e','f','g','h','i','j']) for i in range(x_test_.shape[0])]
x_train_['col_cat2'] = [np.random.choice(['a','b','c','d','e','f','g','h','i']) for i in range(x_train_.shape[0])]
x_test_['col_cat2'] = [np.random.choice(['a','b','c','d','e','f','g','h','i','j']) for i in range(x_test_.shape[0])]
x_train_[np.random.choice([True,False],size = x_train_.shape,p=[0.05,0.95]).reshape(x_train_.shape)] = np.nan
x_test_[np.random.choice([True,False],size = x_test_.shape,p=[0.05,0.95]).reshape(x_test_.shape)] = np.nan
x_train_.to_csv('r_data/x_train.csv',index=False)
x_test_.to_csv('r_data/x_test.csv',index=False)
pd.DataFrame(y_train).to_csv('r_data/y_train.csv',index=False)
pd.DataFrame(y_test).to_csv('r_data/y_test.csv',index=False)
#**THE MAIN LAYER THAT WE ARE TALKING ABOUT**
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import feature_column
import os
class NUM_TO_DENSE(layers.Layer):
def __init__(self,num_cols):
super().__init__()
self.keys = num_cols
self.keys_all = self.keys+[str(i)+'__nullcol' for i in self.keys]
# def get_config(self):
# config = super().get_config().copy()
# config.update({
# 'keys': self.keys,
# 'keys_all': self.keys_all,
# })
# return config
def build(self,input_shape):
def create_moving_mean_vars():
return tf.Variable(initial_value=0.,shape=(),dtype=tf.float32,trainable=False)
self.moving_means_total = {t:create_moving_mean_vars() for t in self.keys}
self.layer_global_counter = tf.Variable(initial_value=0.,shape=(),dtype=tf.float32,trainable=False)
def call(self,inputs, training = True):
null_cols = {k:tf.math.is_finite(inputs[k]) for k in self.keys}
current_means = {}
def compute_update_current_means(t):
current_mean = tf.math.divide_no_nan(tf.reduce_sum(tf.where(null_cols[t],inputs[t],0.),axis=0),\
tf.reduce_sum(tf.cast(tf.math.is_finite(inputs[t]),tf.float32),axis=0))
self.moving_means_total[t].assign_add(current_mean)
return current_mean
if training:
current_means = {t:compute_update_current_means(t) for t in self.keys}
outputs = {t:tf.where(null_cols[t],inputs[t],current_means[t]) for t in self.keys}
outputs.update({str(k)+'__nullcol':tf.cast(null_cols[k],tf.float32) for k in self.keys})
self.layer_global_counter.assign_add(1.)
else:
outputs = {t:tf.where(null_cols[t],inputs[t],(self.moving_means_total[t]/self.layer_global_counter))\
for t in self.keys}
outputs.update({str(k)+'__nullcol':tf.cast(null_cols[k],tf.float32) for k in self.keys})
return outputs
class PREPROCESS_MONSOON(layers.Layer):
def __init__(self,cat_cols_with_unique_values,num_cols):
'''cat_cols_with_unqiue_values: (dict) {'col_cat':[unique_values_list]}
num_cols: (list) [num_cols_name_list]'''
super().__init__()
self.cat_cols = cat_cols_with_unique_values
self.num_cols = num_cols
# def get_config(self):
# config = super().get_config().copy()
# config.update({
# 'cat_cols': self.cat_cols,
# 'num_cols': self.num_cols,
# })
# return config
def build(self,input_shape):
self.ntd = NUM_TO_DENSE(self.num_cols)
self.num_colnames = self.ntd.keys_all
self.ctd = {k:layers.DenseFeatures\
(feature_column.embedding_column\
(feature_column.categorical_column_with_vocabulary_list\
(k,v),tf.cast(tf.math.ceil(tf.math.log(tf.cast(len(self.cat_cols[k]),tf.float32))),tf.int32).numpy()))\
for k,v in self.cat_cols.items()}
self.cat_colnames = [i for i in self.cat_cols]
self.dense_colnames = self.num_colnames+self.cat_colnames
def call(self,inputs,training=True):
dense_num_d = self.ntd(inputs,training=training)
dense_cat_d = {k:self.ctd[k](inputs) for k in self.cat_colnames}
dense_num = tf.stack([dense_num_d[k] for k in self.num_colnames],axis=1)
dense_cat = tf.concat([dense_cat_d[k] for k in self.cat_colnames],axis=1)
dense_all = tf.concat([dense_num,dense_cat],axis=1)
return dense_all
##Inputs
label_path = 'r_data/y_train.csv'
data_path = 'r_data/x_train.csv'
max_epochs = 100
batch_size = 32
shuffle_seed = 42
##Creating layer inputs
dfs = pd.read_csv(data_path,nrows=1)
cdtypes_x = dfs.dtypes
nc = list(dfs.select_dtypes(include=[int,float]).columns)
oc = list(dfs.select_dtypes(exclude=[int,float]).columns)
cdtypes_y = pd.read_csv(label_path,nrows=1).dtypes
dfc = pd.read_csv(data_path,usecols=oc)
ccwuv = {i:list(pd.Series(dfc[i].unique()).dropna()) for i in dfc.columns}
preds_name = pd.read_csv(label_path,nrows=1).columns
##creating datasets
dataset = tf.data.experimental.make_csv_dataset(
'r_data/x_train.csv',batch_size, column_names=cdtypes_x.index,prefetch_buffer_size=1,
shuffle=True,shuffle_buffer_size=10000,shuffle_seed=shuffle_seed)
labels = tf.data.experimental.make_csv_dataset(
'r_data/y_train.csv',batch_size, column_names=cdtypes_y.index,prefetch_buffer_size=1,
shuffle=True,shuffle_buffer_size=10000,shuffle_seed=shuffle_seed)
dataset = tf.data.Dataset.zip((dataset,labels))
##CREATING NETWORK
p = PREPROCESS_MONSOON(cat_cols_with_unique_values=ccwuv,num_cols=nc)
indict = {}
for i in nc:
indict[i] = k.Input(shape = (), name=i,dtype=tf.float32)
for i in ccwuv:
indict[i] = k.Input(shape=(), name=i,dtype=tf.string)
x = p(indict)
x = l.BatchNormalization()(x)
x = l.Dense(10,activation='relu',name='dense_1')(x)
predictions = l.Dense(10,activation=None,name=preds_name[0])(x)
model = k.Model(inputs=indict,outputs=predictions)
##Compiling model
model.compile(optimizer=k.optimizers.Adam(),
loss=k.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
##callbacks
log_dir = './tensorboard_dir/no_config'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
## Fit model on training data
history = model.fit(dataset,
batch_size=64,
epochs=30,
steps_per_epoch=5,
validation_split=0.,
callbacks = [tensorboard_callback])
#saving the model
tf.saved_model.save(model,'r_savedir')
#loading the model
model = tf.saved_model.load('r_savedir')
##Predicting on loaded model
for i in dataset:
print(model(i[0],training=False))
break
I have commented out the part from the code where i override the config files in my custom layers and you can comment them in and the Warning about the layers not being serializable would go away.
Question:
Do i or do i not need to override the config method in order to make a custom layer that accepts arguments in __init__ serializable?
Thank you in advance for help
You must add 'get_config' to your code
def get_config(self):
config = super().get_config()
return config
The NUM_TO_DENSE class must be like this
class NUM_TO_DENSE(layers.Layer):
def __init__(self,num_cols):
super().__init__()
self.keys = num_cols
self.keys_all = self.keys+[str(i)+'__nullcol' for i in self.keys]
def get_config(self):
config = super().get_config()
return config

ValueError: No gradients provided for any variable tensorflow 2.0

I am using tensorflow 2.0 and trying to make a actor critic algorithm to play the game of cartpole. I have done everything right but getting the following error: ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
Please help me out
Here is my code:
import gym
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
MAX_EPISODES = 2000
GAMMA = 0.9
LR_A = 0.001
LR_C = 0.01
env = gym.make("CartPole-v0")
N_ACTIONS = env.action_space.n
N_FEATURES = 4
def make_actor(n_features, n_actions):
inputs = tf.keras.Input(shape=[n_features])
hidden = tf.keras.layers.Dense(20, activation=tf.nn.relu)(inputs)
dist = tf.keras.layers.Dense(n_actions, activation=tf.nn.softmax)(hidden)
model = tf.keras.Model(inputs=inputs, outputs=dist)
return model
def make_critic(n_features):
inputs = tf.keras.Input(shape=[n_features])
hidden = tf.keras.layers.Dense(20, activation=tf.nn.relu)(inputs)
value = tf.keras.layers.Dense(1)(hidden)
model = tf.keras.Model(inputs=inputs, outputs=value)
return model
actor = make_actor(N_FEATURES, N_ACTIONS)
critic = make_critic(N_FEATURES)
actor.summary()
critic.summary()
actor_optimizer = tf.keras.optimizers.Adam(LR_A)
critic_optimizer = tf.keras.optimizers.Adam(LR_C)
def loss_actor(s, a, td_error):
dist = actor(s.reshape(1, 4)).numpy()
log_prob = np.log(dist[0, a])
exp_v = np.mean(log_prob * td_error)
return tf.multiply(exp_v, -1)
def loss_critic(s, s_, r, gamma):
s, s_ = s[np.newaxis, :], s_[np.newaxis, :]
v = critic(s)
v_ = critic(s_)
td_error = r + gamma * v_ - v
return tf.multiply(td_error, 1)
def train(max_episodes):
for episode in range(max_episodes):
s = env.reset().astype(np.float32)
t = 0
track_r = []
while True:
dist = actor(s.reshape(1, 4)).numpy()
a = np.random.choice(range(N_ACTIONS), p=dist.ravel())
s_, r, done, info = env.step(a)
s_ = s_.astype(np.float32)
if done: r=-20
track_r.append(r)
with tf.GradientTape() as cri_tape, tf.GradientTape() as act_tape:
td_error = loss_critic(s, s_, r, GAMMA)
gradient = cri_tape.gradient(td_error, critic.trainable_variables)
critic_optimizer.apply_gradients(zip(gradient,critic.trainable_variables))
with tf.GradientTape() as act_tape:
neg_exp_v = loss_actor(s, a, td_error.numpy())
gradient = act_tape.gradient(neg_exp_v, critic.trainable_variables)
actor_optimizer.apply_gradients(zip(gradient, actor.trainable_variables))
s = s_
t += 1
if done:
print("Episode:{} Steps:{}".format(episode+1, t))
train(MAX_EPISODES)
The error is on line 69:actor_optimizer.apply_gradients(zip(gradient, actor.trainable_variables))
When I tried to print out the gradients for the actor the result was None.
I am really not getting where the problem is.

Tensorflow: Calling externally set-up function in `tf.scan` (e.g using `tf.make_template`) results in error

I have a RNN like structure that has some building blocks (component neural networks) that are passed in by the user. Here is a minimal example:
import tensorflow as tf
tf.reset_default_graph()
def initialize(shape):
init = tf.random_normal(shape, mean=0, stddev=0.1, dtype=tf.float32)
return init
def test_rnn_with_external(input, hiddens, external_fct):
"""
A simple rnn that makes the standard update, then
feeds the new hidden state through some external
function.
"""
dim_in = input.get_shape().as_list()[-1]
btsz = input.get_shape().as_list()[1]
shape = (dim_in + hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("rnn_w", initializer=_init)
_init = tf.zeros([hiddens])
b = tf.get_variable("rnn_b", initializer=_init)
def _step(previous, input):
concat = tf.concat(1, [input, previous])
h_t = tf.tanh(tf.add(tf.matmul(concat, W), b))
h_t = external_fct(h_t)
return h_t
h_0 = tf.zeros([btsz, hiddens])
states = tf.scan(_step,
input,
initializer=h_0,
name="states")
return states
# the external function, relying on the templating mechanism.
def ext_fct(hiddens):
"""
"""
def tmp(input):
shape = (hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("ext_w", initializer=_init)
b = 0
return tf.add(tf.matmul(input, W), b, name="external")
return tf.make_template(name_="external_fct", func_=tmp)
# run from here on
t = 5
btsz = 4
dim = 2
hiddens = 3
x = tf.placeholder(tf.float32, shape=(t, btsz, dim))
ext = ext_fct(hiddens)
states = test_rnn_with_external(x, hiddens, external_fct=ext)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
with the error ending in:
InvalidArgumentError: All inputs to node external_fct/ext_w/Assign must be from the same frame.
With Frame, I would associate an area on the stack. So I thought that maybe tf.make_template does something very wired, and thus it is not useable here. The external function can be rewritten a bit and then called more directly, like so:
import tensorflow as tf
tf.reset_default_graph()
def initialize(shape):
init = tf.random_normal(shape, mean=0, stddev=0.1, dtype=tf.float32)
return init
def test_rnn_with_external(input, hiddens, external_fct):
dim_in = input.get_shape().as_list()[-1]
btsz = input.get_shape().as_list()[1]
shape = (dim_in + hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("rnn_w", initializer=_init)
_init = tf.zeros([hiddens])
b = tf.get_variable("rnn_b", initializer=_init)
def _step(previous, input):
"""
"""
concat = tf.concat(1, [input, previous])
h_t = tf.tanh(tf.add(tf.matmul(concat, W), b))
h_t = external_fct(h_t, hiddens)
return h_t
h_0 = tf.zeros([btsz, hiddens])
states = tf.scan(_step,
input,
initializer=h_0,
name="states")
return states
def ext_fct_new(input, hiddens):
"""
"""
shape = (hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("ext_w_new", initializer=_init)
b = 0
return tf.add(tf.matmul(input, W), b, name="external_new")
t = 5
btsz = 4
dim = 2
hiddens = 3
x = tf.placeholder(tf.float32, shape=(t, btsz, dim))
states = test_rnn_with_external(x, hiddens, external_fct=ext_fct_new)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
However, still the same error InvalidArgumentError: All inputs to node ext_w_new/Assign must be from the same frame.
Of course, moving contents of the external function into the _step part (and tf.get_variableing before) works. But then the flexibility (necessary in the original code) is gone.
What am I doing wrong? Any help/tips/pointers is greatly appreciated.
(Note: Asked this on github, too: https://github.com/tensorflow/tensorflow/issues/4478)
Using a tf.constant_initializer solves the problem. This is described here.