I am getting this error in tensorflow while using gradient tape on this function. I have done everything but still not getting the output.
with tf.GradientTape() as tape:
critic_value = -agent.critic(state_batch, old_actions)
actor_loss = tf.math.reduce_mean(critic_value)
actor_grad = tape.gradient(actor_loss, agent.actor.trainable_variables)
agent.actor.optimizer.apply_gradients(zip(actor_grad, agent.actor.trainable_variables))`
Related
I am trying to convert a Tensorflow(1.15) model to PyTorch model. Since I was getting very different loss values, I tried comparing the output of the LSTM in the forward pass for the same input. The declaration and initialization of the LSTM is given below:
Tensorflow Code
rnn_cell_video_fw = tf.contrib.rnn.LSTMCell(
num_units=self.options['rnn_size'],
state_is_tuple=True,
initializer=tf.orthogonal_initializer()
)
rnn_cell_video_fw = tf.contrib.rnn.DropoutWrapper(
rnn_cell_video_fw,
input_keep_prob=1.0 - rnn_drop,
output_keep_prob=1.0 - rnn_drop
)
sequence_length = tf.expand_dims(tf.shape(video_feat_fw)[1], axis=0)
initial_state = rnn_cell_video_fw.zero_state(batch_size=batch_size, dtype=tf.float32)
rnn_outputs_fw, _ = tf.nn.dynamic_rnn(
cell=rnn_cell_video_fw,
inputs=video_feat_fw,
sequence_length=sequence_length,
initial_state=initial_state,
dtype=tf.float32
)
PyTorch code
self.rnn_video_fw = nn.LSTM(self.options['video_feat_dim'], self.options['rnn_size'], dropout = self.options['rnn_drop'])
rnn_outputs_fw, _ = self.rnn_video_fw(video_feat_fw)
Initialization for LSTM in train.py
def init_weight(m):
if type(m) in [nn.LSTM]:
for param in m.parameters():
nn.init.orthogonal_(m.weight_hh_l0)
nn.init.orthogonal_(m.weight_ih_l0)
The output for tensorflow
The output for pytorch
The same is pretty much the case for every data item and my PyTorch model isn't converging. Is my suspicion of difference in output LSTM being the reason for it correct? If so, where am I going wrong?
Link to the paper
Link to TF code
let me know if anything else is required.
I use the tf.conbrib.image.rotate in loss function, and an error happen:
No gradients provided for any variable, check your graph for ops that do not support gradients,
My code is:
import tensorflow as tf
image_tensor = tf.placeholder(dtype=tf.float32, shape=[None,320,320,1])
target_tensor = tf.placeholder(dtype=tf.float32, shape=[None,320,320,1])
s = tf.concat([image_tensor, target_tensor],axis=3)
s = tf.layers.flatten(s)
w = tf.get_variable(initializer=tf.truncated_normal([204800,1], stddev=0.1),name='w')
b = tf.get_variable(initializer=tf.truncated_normal([1], stddev=0.1),name='b')
a = tf.matmul(s,w)+b
diff = tf.contrib.image.rotate(image_tensor, a[:,0], interpolation='BILINEAR') - target_tensor
loss = tf.reduce_sum(tf.square(diff))
optimizer = tf.train.GradientDescentOptimizer(0.001)
train = optimizer.minimize(loss)
My tensorflow is: 1.4.0, and my computer is Win10.
By the way, how can I rotate the 3D image in tensorflow? the tf.conbrib.image.rotate only work for 2D image.
the tf.contrib.image.rotate cannot be optimized. I find a useful code for spatial transform network (https://github.com/kevinzakka/spatial-transformer-network).
For a Deep learning model I defined with tf2.0 keras I need to write a custom loss function.
As this will depend on stuff like entropy and normal log_prob, it would really make my life less misrable if I could use tf.distributions.Normal and use two model outpus as mu and sigma respectivly.
However, as soon as I put this into my loss function, I get the Keras error that no gradient is defined for this function.
ValueError: An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
I tried encalpsulating the call in a tf.contrib.eager.Variable as I read somewhere. Did not help.
What is the trick to use them? I don't see a reason from the fundamental arcitecture why I should not be able to use them in a mixed form.
#this is just an example which does not really give a meaningful result.
import tensorflow as tf
import tensorflow.keras as K
import numpy as np
def custom_loss_fkt(extra_output):
def loss(y_true,y_pred):
dist = tf.distributions.Normal(loc=y_pred,scale=extra_output)
d = dist.entropy()
return K.backend.mean(d)
return loss
input_node = K.layers.Input(shape=(1,))
dense = K.layers.Dense(8,activation='relu')(input_node)
#dense = K.layers.Dense(4,activation='relu')(dense)
out1 = K.layers.Dense(4,activation='linear')(dense)
out2 = K.layers.Dense(4,activation ='linear')(dense)
model = K.Model(inputs = input_node, outputs = [out1,out2])
model.compile(optimizer = 'adam', loss = [custom_loss_fkt(out2),custom_loss_fkt(out1)])
model.summary()
x = np.zeros((1,1))
y1 = np.array([[0.,0.1,0.2,0.3]])
y2 = np.array([[0.1,0.1,0.1,0.1]])
model.fit(x,[y1,y2],epochs=1000,verbose=0)
print(model.predict(x))
I have a pre-trained Keras Sequential model called agent, and I'm trying to fine-tune it with a loss function.
json_file = open('model/prior_model_RMSprop.json', 'r')
json_model = json_file.read()
json_file.close()
agent = model_from_json(json_model)
prior = model_from_json(json_model)
# load weights into model
agent.load_weights('model/model_RMSprop.h5')
prior.load_weights('model/model_RMSprop.h5')
agent_output = agent.output
prior_output = prior.output
loss = tf.reduce_mean(tf.square(agent_output - prior_output))
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
So far, everything works fine. However, when I add some basic tensorflow operations, the error happens
agent_logits = tf.cast(tf.argmax(agent_output, axis = 2), dtype = tf.float32)
prior_logits = tf.cast(tf.argmax(prior_output, axis = 2), dtype = tf.float32)
loss = tf.reduce_mean(tf.square(agent_logits - prior_logits))
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
ValueError: No gradients provided for any variable
So the tensorflow operations break the connection between the model and the loss function? I've been stucked here for almost 2 weeks so pls help. I'm also not very clear about how to update a Keras model's trainable weights with the loss function I defined. Any hints or related links will be appreciated!!!
Assume I have the following loss function:
loss_a = tf.reduce_mean(my_loss_fn(model_output, targets))
loss_b = tf.reduce_mean(my_other_loss_fn(model_output, targets))
loss_final = loss_a + tf.multiply(alpha, loss_b)
To visualize the norm of the gradients w.r.t to loss_final one could do this:
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
grads_and_vars = optimizer.compute_gradients(loss_final)
grads, _ = list(zip(*grads_and_vars))
norms = tf.global_norm(grads)
gradnorm_s = tf.summary.scalar('gradient norm', norms)
train_op = optimizer.apply_gradients(grads_and_vars, name='train_op')
However, I would like to plot the norm of the gradients w.r.t to loss_a and loss_b separately. How can I do this in the most efficient way? Do I have to call compute_gradients(..) on both loss_a and loss_b separately and then add those two gradients together before passing them to optimizer.apply_gradients(..)? I know that this would mathematically be correct due to the summation rule, but it just seems a bit cumbersome and I also don't know how you would implement the summation of the gradients correctly. Also, loss_final is rather simple, because it's just a summation. What if loss_final was more complicated, e.g. a division?
I'm using Tensorflow 0.12.
You are right that combining gradients could get messy. Instead just compute the gradients of each of the losses as well as the final loss. Because tensorflow optimizes the directed acyclic graph (DAG) before compilation, this doesn't result in duplication of work.
For example:
import tensorflow as tf
with tf.name_scope('inputs'):
W = tf.Variable(dtype=tf.float32, initial_value=tf.random_normal((4, 1), dtype=tf.float32), name='W')
x = tf.random_uniform((6, 4), dtype=tf.float32, name='x')
with tf.name_scope('outputs'):
y = tf.matmul(x, W, name='y')
def my_loss_fn(output, targets, name):
return tf.reduce_mean(tf.abs(output - targets), name=name)
def my_other_loss_fn(output, targets, name):
return tf.sqrt(tf.reduce_mean((output - targets) ** 2), name=name)
def get_tensors(loss_fn):
loss = loss_fn(y, targets, 'loss')
grads = tf.gradients(loss, W, name='gradients')
norm = tf.norm(grads, name='norm')
return loss, grads, norm
targets = tf.random_uniform((6, 1))
with tf.name_scope('a'):
loss_a, grads_a, norm_a = get_tensors(my_loss_fn)
with tf.name_scope('b'):
loss_b, grads_b, norm_b = get_tensors(my_loss_fn)
with tf.name_scope('combined'):
loss = tf.add(loss_a, loss_b, name='loss')
grad = tf.gradients(loss, W, name='gradients')
with tf.Session() as sess:
tf.global_variables_initializer().run(session=sess)
writer = tf.summary.FileWriter('./tensorboard_results', sess.graph)
res = sess.run([norm_a, norm_b, grad])
print(*res, sep='\n')
Edit: In response to your comment... You can check the DAG of a tensorflow model using tensorboard. I've updated the code to store the graph.
Run tensorboard --logdir $PWD/tensorboard_results in a terminal and navigate to the url printed on the commandline (typically http://localhost:6006/). Then click on GRAPH tab to view the DAG. You can recursively expand the tensors, ops, namespaces to see subgraphs to see individual operations and their inputs.