tensorflow modify variables in py_func (and its grad func) - tensorflow

In tensorflow, we can define our own op and its gradient by:
https://gist.github.com/harpone/3453185b41d8d985356cbe5e57d67342
However, can we modify any variable in the computational graph in these python functions. For example in the "_MySquareGrad" function?
I assume we can get the variable by:
var = tf.get_variable('var')
and then do something to change its value and then assign it back?
e.g.
tmp = var*10
var.assign(tmp)
Thanks!
Also when we do var*10, do we have to convert it to numpy?
Background: I'm familiar with automatic differentiation, but new to Tensorflow and Python. So please point out any syntactic problem and let me know if my intention is clear.

You can modify the variables in the computational graph in these python functions. Your example code with tmp = var*10 will work and does not convert anything to numpy.
In fact you should try to avoid converting to numpy as much as possible since it will slow down the computation.
edit:
You can include your code to the gradient computation graph of the _MySquareGrad function doing this:
def _MySquareGrad(op, grad):
#first get a Variable that was created using tf.get_variable()
with tf.variable_scope("", reuse=True):
var = tf.get_variable('var')
#now create the assign graph:
tmp = var*10.
assign_op = var.assign(tmp)
#now make the assign operation part of the grad calculation graph:
with tf.control_dependencies([assign_op]):
x = tf.identity(op.inputs[0])
return grad * 20 * x
Here is a working example:
import tensorflow as tf
from tensorflow.python.framework import ops
import numpy as np
# Define custom py_func which takes also a grad op as argument:
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
# Need to generate a unique name to avoid duplicates:
rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad example
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": rnd_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
# Def custom square function using np.square instead of tf.square:
def mysquare(x, name=None):
with ops.name_scope(name, "Mysquare", [x]) as name:
sqr_x = py_func(np.square,
[x],
[tf.float32],
name=name,
grad=_MySquareGrad) # <-- here's the call to the gradient
return sqr_x[0]
### Actual gradient:
##def _MySquareGrad(op, grad):
##x = op.inputs[0]
##return grad * 20 * x # add a "small" error just to see the difference:
def _MySquareGrad(op, grad):
#first get a Variable that was created using tf.get_variable()
with tf.variable_scope("", reuse=True):
var = tf.get_variable('var')
#now create the assign graph:
tmp = var*10.
assign_op = var.assign(tmp)
#now make the assign operation part of the grad calculation graph:
with tf.control_dependencies([assign_op]):
x = tf.identity(op.inputs[0])
return grad * 20 * x
with tf.Session() as sess:
x = tf.constant([1., 2.])
var = tf.get_variable(name="var", shape=[], initializer=tf.constant_initializer(0.2))
y = mysquare(x)
tf.global_variables_initializer().run()
print(x.eval(), y.eval(), tf.gradients(y, x)[0].eval())
print("Now var is 10 times larger:", var.eval())

Related

How can I print output (tensor values, shapes) in gpflow?

I am trying to develop a new model within gpflow. In order to debug it I need to know shapes and values of tensors during execution of the graph.
I tried the below based on printing tensor values in tensorflow, but nothing is printed to the console.
import numpy as np
import sys
import gpflow
from gpflow.mean_functions import MeanFunction
from gpflow.decors import params_as_tensors
class Log(MeanFunction):
"""
:math:`y_i = \log(x_i)`
"""
def __init__(self):
MeanFunction.__init__(self)
#params_as_tensors
def __call__(self, X):
# I want to figure out the shape of X here
tf.print(tf.shape(X), output_stream=sys.stdout)
# Returns the natural logarithm of the input
return tf.log(X)
# Test gpflow implementation
sess = tf.InteractiveSession()
with sess.as_default(), sess.graph.as_default():
X = np.random.uniform(size=[100, 1])
y = np.random.uniform(size=[100, 1])
m = gpflow.models.GPR(X=X, Y=y, mean_function=Log(), kern=gpflow.kernels.RBF(input_dim=1))
You're on the right track. According to the TensorFlow docs [1], you need to wrap tf.print() in a tf.control_dependencies() context manager to make sure it's run, when in graph model. GPflow currently works in graph model. GPflow 2.0, which is indevelopment, will allow usage in eager mode.
#params_as_tensors
def __call__(self, X):
# I want to figure out the shape of X here
print_op = tf.print(tf.shape(X), output_stream=sys.stdout)
with tf.control_dependencies([print_op]):
log_calc = tf.log(X)
# Returns the natural logarithm of the input
return log_calc
[1] https://www.tensorflow.org/api_docs/python/tf/print

How to apply a computed loss to a graph?

I am new to tensorflow and trying to code a toy discriminator problem. The way I have it set up, the loss is calculated from the expert_actions and the novice_actions. However, I am running an error when I am trying to optimize using the computed loss. The error is ValueError: No variables to optimize. I do understand that I am getting the error because there is no feed_dict. However, I do not know the solution to this.
class discriminator:
def __init__(self,n_actions, learning_rate):
self.n_actions = n_actions
self.learning_rate_dist = learning_rate
self.graph = tf.Graph()
with self.graph.as_default():
self.dis_input = tf.placeholder(tf.float32, [None, self.n_actions])
self.discriminator_function()
init = tf.global_variables_initializer()
self.sess = tf.Session(graph=self.graph)
self.sess.run(init)
def discriminator_function(self, hidden = None):
if hidden == None:
hidden = 16
x = tf.layers.dense(self.dis_input,hidden,tf.nn.relu)
x = tf.layers.dense(x,hidden,tf.nn.relu)
self.dis_output = tf.layers.dense(x,1)
def discriminator(self,expert_actions,novice_actions):
expert_out = self.sess.run(self.dis_output,feed_dict={self.dis_input : expert_actions})
novice_out = self.sess.run(self.dis_output,feed_dict={self.dis_input : novice_actions})
loss = tf.reduce_mean(tf.log(expert_out) + tf.log(1.-novice_out))
# update discriminator loss
optimize = tf.train.AdamOptimizer(self.learning_rate_dis).minimize(-loss)
self.sess.run(optimize) #error over here
return loss
if __name__ == '__main__':
d = discriminator(2,0.001)
expert_actions = np.random.randint(2, size=10)
novice_actions = np.random.randint(2, size=10)
d.discriminator(expert_actions,novice_actions)
You are trying to optimize loss = tf.reduce_mean(tf.log(expert_out) + tf.log(1.-novice_out)) with expert_out and novice_out being numpy arrays. There are no variables between the input and loss, to compute gradients.
Your discriminator function should be something like this:
def discriminator(self,expert_actions,novice_actions):
#Make sure you add the new ops and variables to the graph defined.
with self.graph.as_default():
loss = tf.reduce_mean(tf.log('Should be a tensor that is part of the graph and not a numpy array))
optimize = tf.train.AdamOptimizer(0.01).minimize(-loss)
self.sess.run(tf.global_variables_initializer())
#pass the inputs here
loss = self.sess.run([loss, optimize], feed_dict={self.dis_input : expert_actions})
return loss

TensorFlow: how to do python function with custom gradients without eval?

I am trying to write some custom TensorFlow functions in python (using tf.py_func) where I want to calculate both the results and the gradients in python. I'm using the gradient_override_map trick (for example from from https://gist.github.com/harpone/3453185b41d8d985356cbe5e57d67342 and How to make a custom activation function with only Python in Tensorflow?).
However, while the function in the forward direction gets a numpy array as an input, the function for the gradient gets Tensors. This is a problem, depending on when the function gets called, because there may not be a default session, and/or there may not be a feed_dict with all the required values yet (for example, in a tf.train optimizer).
How do I do a py_func where both the forward and backward functions get (and return) numpy arrays?
Sample code:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
def sin_func(x):
return np.sin(x)
def sin_grad_func(op, grad):
x = op.inputs[0].eval()
grad = grad.eval() # <--- this is what I'd like to avoid
output_grad = np.cos(x) * grad
return tf.convert_to_tensor(output_grad)
def py_func(func, inp, Tout, stateful=True, name=None, grad_func=None):
grad_name = 'PyFuncGrad_' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(grad_name)(grad_func)
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": grad_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
with tf.Session() as sess:
np_x = np.linspace(0, np.pi, num=1000, dtype=np.float32)
x = tf.constant(np_x)
y = py_func(sin_func,
[x],
[tf.float32],
name='np_sin',
grad_func=sin_grad_func)
y = y[0]
gr = tf.gradients(y, [x])
tf.global_variables_initializer().run()
plt.plot(y.eval())
plt.plot(gr[0].eval())
If you want to include arbitrary Python code in your gradient function, the easiest solution is to create another tf.py_func() inside sin_grad_func():
def sin_grad_func_impl(x, grad):
return np.cos(x) * grad
def sin_grad_func(op, grad):
return tf.py_func(sin_grad_func_impl, [x, grad], grad.dtype)

Simple custom gradient with gradient_override_map

I want to use a function that creates weights for a normal dense layer, it basically behaves like an initialization function, only that it "initializes" before every new forward pass.
The flow for my augmented linear layer looks like this:
input = (x, W)
W_new = g(x,W)
output = tf.matmul(x,W_new)
However, g(x,W) is not differentiable, as it involves some sampling. Luckily it also doesn't have any parameters I want to learn so I just try to do the forward and backward pass, as if I would have never replaced W.
Now I need to tell the automatic differentiation to not backpropagate through g(). I do this with:
W_new = tf.stop_gradient(g(x,W))
Unfortunately this does not work, as it complains about non-matching shapes.
What does work is the following:
input = (x, W)
W_new = W + tf.stop_gradient(g(x,W) - W)
output = tf.matmul(x,W_new)
as suggested here: https://stackoverflow.com/a/36480182
Now the forward pass seems to be OK, but I don't know how to override the gradient for the backward pass. I know, that I have to use: gradient_override_map for this, but could not transfer applications I have seen to my particular usecase (I am still quite new to TF).
However, I am not sure how to do this and if there isn't an easier way. I assume something similar has to be done in the first forward pass in a given model, where all weights are initialized while we don't have to backpropagate through the init functions as well.
Any help would be very much appreciated!
Hey #jhj I too faced the same problem fortunately I found this gist. Hope this helps :)
Sample working -
import tensorflow as tf
from tensorflow.python.framework import ops
import numpy as np
Define custom py_func which takes also a grad op as argument:
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
# Need to generate a unique name to avoid duplicates:
rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad example
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": rnd_name, "PyFuncStateless": rnd_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
Def custom square function using np.square instead of tf.square:
def mysquare(x, name=None):
with ops.name_scope(name, "Mysquare", [x]) as name:
sqr_x = py_func(np.square,
[x],
[tf.float32],
name=name,
grad=_MySquareGrad) # <-- here's the call to the gradient
return sqr_x[0]
Actual gradient:
def _MySquareGrad(op, grad):
x = op.inputs[0]
return grad * 20 * x # add a "small" error just to see the difference:
with tf.Session() as sess:
x = tf.constant([1., 2.])
y = mysquare(x)
tf.global_variables_initializer().run()
print(x.eval(), y.eval(), tf.gradients(y, x)[0].eval())

Understanding Variable scope example in Tensorflow

I was looking at the mechanics section for Tensorflow, specifically on shared variables. In the section "The problem", they are dealing with a convolutional neural net, and provide the following code (which runs an image through the model):
# First call creates one set of variables.
result1 = my_image_filter(image1)
# Another set is created in the second call.
result2 = my_image_filter(image2)
If the model was implemented in such a way, would it then be impossible to learn/update the parameters because there's a new set of parameters for each image in my training set?
Edit:
I've also tried "the problem" approach on a simple linear regression example, and there do not appear to be any issues with this method of implementation. Training seems to work as well as can be shown by the last line of the code. So I'm wondering if there is a subtle discrepancy in the tensorflow documentation and what I'm doing. :
import tensorflow as tf
import numpy as np
trX = np.linspace(-1, 1, 101)
trY = 2 * trX + np.random.randn(*trX.shape) * 0.33 # create a y value which is approximately linear but with some random noise
X = tf.placeholder("float") # create symbolic variables
Y = tf.placeholder("float")
def model(X):
with tf.variable_scope("param"):
w = tf.Variable(0.0, name="weights") # create a shared variable (like theano.shared) for the weight matrix
return tf.mul(X, w) # lr is just X*w so this model line is pretty simple
y_model = model(X)
cost = (tf.pow(Y-y_model, 2)) # use sqr error for cost function
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(cost) # construct an optimizer to minimize cost and fit line to my data
sess = tf.Session()
init = tf.initialize_all_variables() # you need to initialize variables (in this case just variable W)
sess.run(init)
with tf.variable_scope("train"):
for i in range(100):
for (x, y) in zip(trX, trY):
sess.run(train_op, feed_dict={X: x, Y: y})
print sess.run(y_model, feed_dict={X: np.array([1,2,3])})
One has to create the variable set only once per whole training (and testing) set. The goal of variable scopes is to allow for modularization of subsets of parameters, such as those belonging to layers (e.g. when architecture of a layer is repeated, the same names can be used within each layer scope).
In your example you create parameters only in the model function. You can print out your variable names to see that it is assigned to the specified scope:
from __future__ import print_function
X = tf.placeholder("float") # create symbolic variables
Y = tf.placeholder("float")
print("X:", X.name)
print("Y:", Y.name)
def model(X):
with tf.variable_scope("param"):
w = tf.Variable(0.0, name="weights") # create a shared variable (like theano.shared) for the weight matrix
print("w:", w.name)
return tf.mul(X, w)
The call to sess.run(train_op, feed_dict={X: x, Y: y}) only evaluates the value of train_op given the provided values of X and Y. No new variables (incl. parameters) are created there; therefore, it has no effect. You can make sure the variable names stay the same by again printing them out:
with tf.variable_scope("train"):
print("X:", X.name)
print("Y:", Y.name)
for i in range(100):
for (x, y) in zip(trX, trY):
sess.run(train_op, feed_dict={X: x, Y: y})
You will see that variable names stay the same, as they are already initialized.
If you'd like to retrieve a variable using its scope, you need to use get_variable within a tf.variable_scope enclosure:
with tf.variable_scope("param"):
w = tf.get_variable("weights", [1])
print("w:", w.name)