Compute gradient norm of each part of composite loss function - tensorflow

Assume I have the following loss function:
loss_a = tf.reduce_mean(my_loss_fn(model_output, targets))
loss_b = tf.reduce_mean(my_other_loss_fn(model_output, targets))
loss_final = loss_a + tf.multiply(alpha, loss_b)
To visualize the norm of the gradients w.r.t to loss_final one could do this:
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
grads_and_vars = optimizer.compute_gradients(loss_final)
grads, _ = list(zip(*grads_and_vars))
norms = tf.global_norm(grads)
gradnorm_s = tf.summary.scalar('gradient norm', norms)
train_op = optimizer.apply_gradients(grads_and_vars, name='train_op')
However, I would like to plot the norm of the gradients w.r.t to loss_a and loss_b separately. How can I do this in the most efficient way? Do I have to call compute_gradients(..) on both loss_a and loss_b separately and then add those two gradients together before passing them to optimizer.apply_gradients(..)? I know that this would mathematically be correct due to the summation rule, but it just seems a bit cumbersome and I also don't know how you would implement the summation of the gradients correctly. Also, loss_final is rather simple, because it's just a summation. What if loss_final was more complicated, e.g. a division?
I'm using Tensorflow 0.12.

You are right that combining gradients could get messy. Instead just compute the gradients of each of the losses as well as the final loss. Because tensorflow optimizes the directed acyclic graph (DAG) before compilation, this doesn't result in duplication of work.
For example:
import tensorflow as tf
with tf.name_scope('inputs'):
W = tf.Variable(dtype=tf.float32, initial_value=tf.random_normal((4, 1), dtype=tf.float32), name='W')
x = tf.random_uniform((6, 4), dtype=tf.float32, name='x')
with tf.name_scope('outputs'):
y = tf.matmul(x, W, name='y')
def my_loss_fn(output, targets, name):
return tf.reduce_mean(tf.abs(output - targets), name=name)
def my_other_loss_fn(output, targets, name):
return tf.sqrt(tf.reduce_mean((output - targets) ** 2), name=name)
def get_tensors(loss_fn):
loss = loss_fn(y, targets, 'loss')
grads = tf.gradients(loss, W, name='gradients')
norm = tf.norm(grads, name='norm')
return loss, grads, norm
targets = tf.random_uniform((6, 1))
with tf.name_scope('a'):
loss_a, grads_a, norm_a = get_tensors(my_loss_fn)
with tf.name_scope('b'):
loss_b, grads_b, norm_b = get_tensors(my_loss_fn)
with tf.name_scope('combined'):
loss = tf.add(loss_a, loss_b, name='loss')
grad = tf.gradients(loss, W, name='gradients')
with tf.Session() as sess:
tf.global_variables_initializer().run(session=sess)
writer = tf.summary.FileWriter('./tensorboard_results', sess.graph)
res = sess.run([norm_a, norm_b, grad])
print(*res, sep='\n')
Edit: In response to your comment... You can check the DAG of a tensorflow model using tensorboard. I've updated the code to store the graph.
Run tensorboard --logdir $PWD/tensorboard_results in a terminal and navigate to the url printed on the commandline (typically http://localhost:6006/). Then click on GRAPH tab to view the DAG. You can recursively expand the tensors, ops, namespaces to see subgraphs to see individual operations and their inputs.

Related

How to find input that maximizes output of a neural network using pytorch

I have a pytorch network, that have been trained and weights are updated (complete training).
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(1, H)
self.fc2 = nn.Linear(1, H)
self.fc3 = nn.Linear(H, 1)
def forward(self, x, y):
h1 = F.relu(self.fc1(x)+self.fc2(y))
h2 = self.fc3(h1)
return h2
After training, I want to maximize the output of the network with respect to input. In other words, I want to optimize the input to maximize the neural network output, without changing weights. How can I achieve that.
My trial, but it doesn't make sense:
in = torch.autograd.Variable(x)
out = Net(in)
grad = torch.autograd.grad(out, input)
Disable gradients for the network.
Set your input tensor as a parameter requiring grad.
Initialize an optimizer wrapping the input tensor.
Backprop with some loss function and a goal tensor
...
Profit!
import torch
f = torch.nn.Linear(10, 5)
f.requires_grad_(False)
x = torch.nn.Parameter(torch.rand(10), requires_grad=True)
optim = torch.optim.SGD([x], lr=1e-1)
mse = torch.nn.MSELoss()
y = torch.ones(5) # the desired network response
num_steps = 5 # how many optim steps to take
for _ in range(num_steps):
loss = mse(f(x), y)
loss.backward()
optim.step()
optim.zero_grad()
But make sure that your goal tensor is well defined wrt. the network's monotonicity, otherwise you might end up with nans.

Can't apply gradients on tf.Variable

I am trying to learn a similarity matrix(M) between two image embeddings, A single instance of training is a pair of images - (anchor, positive). So ideally the model will return 0 distance for embeddings of similar images.
The problem is, when i declare the distance matrix(M) as a tf.Variable, it returns an error
on this line
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
TypeError: 'Variable' object is not iterable.
I think I should use a tensorflow datatype for M, that is iterable
Please tell me how I can fix this issue
import tensorflow as tf
from tensorflow import keras
# metric learning model
class MetricLearningModel:
def __init__(self, lr):
self.optimizer = keras.optimizers.Adam(lr=lr)
self.lr = lr
self.loss_object = keras.losses.MeanSquaredError()
self.trainable_variables = tf.Variable(
(tf.ones((2048, 2048), dtype=tf.float32)),
trainable=True
)
def similarity_function(self, anchor_embeddings, positive_embeddings):
M = self.trainable_variables
X_i = anchor_embeddings
X_j = positive_embeddings
similarity_value = tf.matmul(X_j, M, name='Tensor')
similarity_value = tf.matmul(similarity_value, tf.transpose(X_i), name='Tensor')
# distance(x,y) = sqrt( (x-y)#M#(x-y).T )
return similarity_value
def train_step(self, anchor, positive):
anchor_embeddings, positive_embeddings = anchor, positive
# Calculate gradients
with tf.GradientTape() as tape:
# Calculate similarity between anchors and positives.
similarities = self.similarity_function(anchor_embeddings, positive_embeddings)
y_pred = similarities
y_true = tf.zeros(1)
print(y_true, y_pred)
loss_value = self.loss_object(
y_pred=y_true,
y_true=y_pred,
)
gradients = tape.gradient(loss_value, self.trainable_variables)
# Apply gradients via optimizer
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
metric_model = MetricLearningModel(lr=1e-3)
anchor, positive = tf.ones((1, 2048), dtype=tf.float32), tf.ones((1, 2048), dtype=tf.float32)
metric_model.train_step(anchor, positive)
The python zip function expects iterable objects, like for example a list or a tuple.
In your calls to tape.gradient, or optimizer.apply_gradients, you can put your Variable in a list to solve the issue :
with tf.GradienTape() as tape:
gradients = tape.gradient(loss_value, [self.trainable_variables])
# Apply gradients via optimizer
self.optimizer.apply_gradients(zip(gradients, [self.trainable_variables]))
tape.gradient respects the shape of the sources object passed to compute the gradients of, so if you feed it with a list, you will get a list out of it. It is stated in the documentation:
Returns
a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in sources. Returned structure is the same as the structure of sources.

adding value to tensorboard for adversarial learning

I'm new to tensorboard. I have faced some problem while using it.
Problem 1 :
I'm writing an adversarial learning model. For visualizing the loss of the model I have the following loss,
actor loss
critic loss
for the learning algorithm provided in this paper,
in one(or K) batch I have to feed actor and critic both. Then I need to only feed value to the critic. This time there is no actor. I think, to show value in tensorboard I need to do following,
def model():
...
actor_loss = ...
tf.summary.scalar('actor', actor_loss)
...
critic_loss = ...
tf.summary.scalar('critic', critic_loss)
my_graph = tf.Graph()
with my_graph.as_default():
tf.reset_default_graph()
sess = tf.Session()
with sess.as_default():
model()
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter(address+ '/train',
sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
Now while giving input to inner_loop (where actor and critic both participated) there's no problem, we get the result by following,
a,b,c,d,summary = sess.run( [actor_train_step, critic_train_step, actor_loss, critic_loss, merged], feed_dict = feed_dict )
writer.add_summary(summary, batch)
but when we want to give input only to the critic, then the code becomes following,
a,b,summary = sess.run( [critic_train_step, critic_loss, merged], feed_dict = feed_dict )
writer.add_summary(summary, batch)
But as merged have dependency over actor_loss it cannot run. On the other side, I can't just feed value to of actor to the model. How how to solve this issue?
Problem 2
I am not evaluating (calculating the score value) the model by tensor operation. Actually, I generate the output and fed the output to another script and got the score value from there. So after each of the batch/epoch I am evaluating my model and got a score value from the script. How can I save this value to tensorboard?
I can not formalize a tf.summary.merge_all() operation before the session initialization as I am calculating the evaluation score value at the training time from outside script.
Where should I put the tf.summary.merge_all() operation?
Now if I want to combine the Problem 1 and Problem 2 to in a single project is there anything new I have to do.
Note: I'm new to tensorboard. So it will be better if you can give a detailed explanation.
Problem #1
If you only want to summary only the critic op, you should only run the summary op for the critic part instead of using tf.summary.merge_all()
For example:
def model():
...
actor_loss = ...
tf.summary.scalar('actor', actor_loss)
...
critic_loss = ...
summary_critic = tf.summary.scalar('critic', critic_loss)
a,b,summary = sess.run( [critic_train_step, critic_loss, summary_critic], feed_dict = feed_dict )
writer.add_summary(summary, batch)
Problem #2
To visualize the values you got after running the outside script. You can convert those values to tensor using tf.convert_to_tensor(), which is documented here. Then serializing that tensor to visualize it on tensorboard.
For example:
vals = output_from_outside_script()
vals_tensor = sess.run(tf.convert_to_tensor(vals))
tf.summary.scalar('evaluation', vals_tensor)
Every tf.summary operations will create a Summary protobuf which serializing your tensor to an events file. And instead of running all the summary ops, Tensorflow provide tf.summary.merger_all() to run all the summary ops in your graph.
I tried to do it in your case.
Outside script:
import numpy as np
def output_from_outside_script(var):
return np.sum(var)
Code in adversarial training:
import tensorflow as tf
import numpy as np
from outside_evaluation import *
sess = tf.Session()
x = sess.run(tf.constant([[1,2,3,4]], dtype=tf.float32))
X = tf.placeholder(dtype=tf.float32, shape=[1, 4])
W = tf.Variable(tf.truncated_normal([4, 10], stddev=0.1))
sess.run(tf.global_variables_initializer())
val = tf.matmul(a=X, b=W, name='matmul')
tf.summary.scalar('matmul_mean', tf.reduce_mean(val))
y = sess.run(val, feed_dict={X: x})
print('y = ', y)
vals = output_from_outside_script(y)
print('vals = ', vals)
vals_tensor = tf.convert_to_tensor(vals, name='vals_tensor')
tf.summary.scalar('evaluation', vals_tensor)
writer = tf.summary.FileWriter(os.path.join('test_log'), sess.graph)
merged = tf.summary.merge_all()
summary = sess.run(merged, feed_dict={X: x})
writer.add_summary(summary)
writer.close()
Output:
('y = ', array([[-0.51137048, -0.16054343, -0.03827953, 0.1124011 , 0.09200752,
-0.22235785, 0.41357356, 1.04061067, -0.08877556, -0.86647421]],
('vals = ', -0.22920817)
Tensorboard log:
Scalar:
Should there be any problem, please let me know.

Using `softmax_cross_entropy_with_logits()` with `seq2seq.sequence_loss()`

I have a working RNN using the default softmax loss function for tf.contrib.seq2seq.sequence_loss() (which I'm assuming is tf.nn.softmax()) but would instead like to use tf.nn.softmax_cross_entropy_with_logits(). According to the seq2seq.sequence_loss documentation, one may use softmax_loss_function= to override the default loss function:
softmax_loss_function: Function (labels, logits) -> loss-batch to be
used instead of the standard softmax (the default if this is None).
Note that to avoid confusion, it is required for the function to
accept named arguments.
Here is my code that works:
from tensorflow.python.layers.core import Dense
# Build the graph
train_graph = tf.Graph()
# Set the graph to default to ensure that it is ready for training
with train_graph.as_default():
# Load the model inputs
input_data, targets, keep_prob, lr, target_sequence_length, max_target_sequence_length, source_sequence_length \
= get_model_inputs()
# Create the training and inference logits
training_decoder_output, inference_decoder_output = seq2seq_model(input_data,
targets,
lr,
target_sequence_length,
max_target_sequence_length,
source_sequence_length,
len(source_letter_to_int),
len(target_letter_to_int),
encoding_embedding_size,
decoding_embedding_size,
rnn_size,
num_layers,
keep_prob)
# Create tensors for the training logits and inference logits
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
inference_logits = tf.identity(inference_decoder_output.sample_id, name='predictions')
# Create the weights for sequence_loss
masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')
with tf.name_scope("optimization"):
# Loss function
cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks)
# Optimizer
optimizer = tf.train.AdamOptimizer(lr)
# Gradient Clipping
gradients = optimizer.compute_gradients(cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
train_op = optimizer.apply_gradients(capped_gradients)
# Add variables to collection in order to load them up when retraining a saved graph
tf.add_to_collection("cost", cost)
tf.add_to_collection("train_op", train_op)
My attempt to change the loss function is as follows (I've only indicated the code that is different):
with tf.name_scope("optimization"):
# One-hot encode targets and reshape to match logits, one row per batch_size per step
y_one_hot = tf.one_hot(targets, len(target_letter_to_int))
y_reshaped = tf.reshape(y_one_hot, [batch_size, len(target_letter_to_int), 30])
# Loss function
loss = tf.nn.softmax_cross_entropy_with_logits(logits=training_logits, labels=y_reshaped)
loss = tf.reduce_mean(loss)
cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks, softmax_loss_function=loss)
The line cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks, softmax_loss_function=loss) is now giving me "TypeError: 'Tensor' object is not callable." This is one of the most opaque errors I've seen Tensorflow produce and I haven't found much of anything in the way of explanation on the internet. Any help would be appreciated.

tensorflow RNN implementation

I'm building a RNN model to do the image classification. I used a pipeline to feed in the data. However it returns
ValueError: Variable rnn/rnn/basic_rnn_cell/weights already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
I wonder what can I do to fix this since there are not many examples of implementing RNN with an input pipeline. I know it would work if I use the placeholder, but my data is already in the form of tensors. Unless I can feed the placeholder with tensors, I prefer just to use the pipeline.
def RNN(inputs):
with tf.variable_scope('cells', reuse=True):
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=batch_size)
with tf.variable_scope('rnn'):
outputs, states = tf.nn.dynamic_rnn(basic_cell, inputs, dtype=tf.float32)
fc_drop = tf.nn.dropout(states, keep_prob)
logits = tf.contrib.layers.fully_connected(fc_drop, batch_size, activation_fn=None)
return logits
#Training
with tf.name_scope("cost_function") as scope:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=train_label_batch, logits=RNN(train_batch)))
train_step = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(cost)
#Accuracy
with tf.name_scope("accuracy") as scope:
correct_prediction = tf.equal(tf.argmax(RNN(test_image), 1), tf.argmax(test_image_label, 0))
accuracy = tf.cast(correct_prediction, tf.float32)
You need to use the reuse option correctly. following changes would solve it. For prediction you need to use the already existed variables in the graph.
def RNN(inputs, reuse):
with tf.variable_scope('cells', reuse=reuse):
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=batch_size, reuse=reuse)
...
...
#Training
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=train_label_batch, logits=RNN(train_batch, reuse=None)))
#Accuracy
...
correct_prediction = tf.equal(tf.argmax(RNN(test_image, reuse=True), 1), tf.argmax(test_image_label, 0))