I'm trying to write a custom activation function using tf.custom_gradient. Specifically I want to use the taylor expansion of 1/x for x<1 and 1/x otherwise. Here's my code:
#tf.custom_gradient
def taylor_inverse(x):
def func(x):
return(tf.cond(x<1, taylor(x), tf.math.reciprocal(x)))
def grad(upstream):
return(tf.cond(upstream<1, taylor_grad(upstream), inv_diff(upstream)))
return func(x), grad
#tf.function
def taylor(x):
return(4 - 6 * x + 4 * x ** 2 - x ** 3)
#tf.function
def taylor_grad(x):
return(-3 * x ** 2 + 8 * x - 6)
#tf.function
def inv_diff(x):
return(-tf.math.reciprocal(x)**2)
I get the error message:
TypeError: 'Tensor' object is not callable
Equations are -x3+4x2-6x+4 and for the gradient -3x2+8x-6, and I get error in this line:
layer_inverse = Lambda(lambda x: taylor_inverse(x),output_shape=(1,))(layer)
Thank you for your help
tf.cond second and third arguments should be callable function. So, use it like this:
#tf.custom_gradient
def taylor_inverse(x):
def func(x):
return(tf.cond(x<1, lambda: taylor(x), lambda: tf.math.reciprocal(x)))
def grad(upstream):
return(tf.cond(upstream<1, lambda: taylor_grad(upstream), lambda: inv_diff(upstream)))
return func(x), grad
Related
I want to feed pytorch gradients manually. In my real problem, I have my own adjoint function that does not use tensors. Is there any way I can define my own gradient function for pytorch to use during optimization?
import numpy as np
import torch
# define rosenbrock function and gradient
x0 = np.array([0.1, 0.1])
a = 1
b = 5
def f(x):
return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
def jac(x):
dx1 = -2 * a + 4 * b * x[0] ** 3 - 4 * b * x[0] * x[1] + 2 * x[0]
dx2 = 2 * b * (x[1] - x[0] ** 2)
return np.array([dx1, dx2])
# create stochastic rosenbrock function and gradient
# (the crude analogy is that I have predefined stochastic
# forward and backward functions)
def f_rand(x):
return f(x) * np.random.uniform(0.5, 1.5)
def jac_rand(x): return jac(x) * np.random.uniform(0.5, 1.5)
x_tensor = torch.tensor(x0, requires_grad=False)
optimizer = torch.optim.Adam([x_tensor], lr=0.1)
# here, closure is fed f_rand to compute the gradient.
# I need to feed closer the gradient directly from jac_rand
def closure():
optimizer.zero_grad()
loss = f_rand(x_tensor)
loss.backward() # jac_rand(x)
return loss
for ii in range(200):
optimizer.step(closure)
print(x_tensor, f(x_tensor))
# tensor([1.0000, 1.0000], dtype=torch.float64, requires_grad=True) tensor(4.5799e-09, dtype=torch.float64, grad_fn=<AddBackward0>)
# ( this is the right answer, E[f(1, 1)] = 0 )
I've tried defining a custom function, but I can't get it to work. This is my best attempt so far:
import numpy as np
import torch
# define rosenbrock function and gradient
x0 = np.array([0.1, 0.1])
a = 1
b = 5
def f(x):
return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
def jac(x):
dx1 = -2 * a + 4 * b * x[0] ** 3 - 4 * b * x[0] * x[1] + 2 * x[0]
dx2 = 2 * b * (x[1] - x[0] ** 2)
return np.array([dx1, dx2])
# create stochastic rosenbrock function and gradient
def f_rand(x):
return f(x) * np.random.uniform(0.5, 1.5)
def jac_rand(x): return jac(x) * np.random.uniform(0.5, 1.5)
class custom_function(torch.autograd.Function):
#staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
return f_rand(input)
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output * g_rand(input)
x_tensor = torch.tensor(x0, requires_grad=False)
optimizer = torch.optim.Adam([x_tensor], lr=0.1)
for ii in range(200):
print('x_tensor ', x_tensor)
optimizer.step(custom_function())
print(x_tensor, f(x_tensor))
It says:
RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)
Not quite sure if this is exactly what you want but the method call loss.backward() computes gradients via pytorch's computational graph and stores the gradient values in the weight tensors themselves (in your case it's in x_tensor). And these gradients can be accessed via x_tensor.grad. However, if you don't want to use pytorch's gradient computing method using loss.backward(), then you can manually feed your gradients into your tensor's .grad attribute as follows:
with torch.no_grad():
def closure():
optimizer.zero_grad()
loss = f_rand(x_tensor)
x_tensor.grad = torch.from_numpy(jac_rand(x_tensor))
return loss
I made some modifications, mainly the learning rate and the number of iterations. You will see the loss goes to zero as the tensor approaches (a, a²).
import torch
import numpy as np
import torch
# define rosenbrock function and gradient
np.random.seed(0)
x0 = np.array([0.1, 0.1])
a = 6
b = 100
def f(x):
return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
def jac(x):
dx1 = -2 * a + 4 * b * x[0] ** 3 - 4 * b * x[0] * x[1] + 2 * x[0]
dx2 = 2 * b * (x[1] - x[0] ** 2)
return np.array([dx1, dx2])
# create stochastic rosenbrock function and gradient
def f_rand(x):
#return f(x)
return f(x) * np.random.uniform(0.5, 1.5)
def jac_rand(x):
#return jac(x)
return jac(x) * np.random.uniform(0.5, 1.5)
class CustomFunction(torch.autograd.Function):
#staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
return f_rand(input)
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output * jac_rand(input)
custom_function = CustomFunction.apply
x_tensor = torch.tensor(x0, requires_grad=True)
optimizer = torch.optim.Adam([x_tensor], lr=0.0001)
print('x_tensor ', x_tensor)
for ii in range(1000000):
optimizer.zero_grad()
output=custom_function(x_tensor)
loss = round(output.item(),8)
if loss < 0.0000001:
print('loss: ',loss)
break
print('loss: ',loss)
output.backward()
optimizer.step()
print(x_tensor, f(x_tensor))
I have a problem where I need to modify a variable inside a Tensorflow function. Then I need
to convert this function to a tensorflow graph.
The problem is that the size of the variable is not fix. Example: it can be either a tenosr of shape (3,) or (2,). This is why the function takes this variable as a parameter, so that it can modify it and return it.
Here is an example of a class that contains a function call, this function takes two arguments (x,v).
x is a Tf.tensor and v is a tf.Variable. v is assigned the the multiplication of x*v.
import tensorflow as tf
class MyModule(tf.Module):
def __init__(self):
pass
#tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.int32), tf.TensorSpec(shape=[None], dtype=tf.int32)])
def __call__(self, x, v):
v.assign(x*v, read_value=False)
return v
tf.config.run_functions_eagerly(False)
x = tf.constant([10,10])
v = tf.Variable(2*tf.ones_like(x), trainable=False)
module = MyModule()
module(x, v)
This works as expected in eager mode, but in graph mode I get the following error:
AttributeError: 'Tensor' object has no attribute 'assign'
I know that it is because of the signature of tf.Variable.
My question is how can I specify the signature of tf.Variable given that the current one produces an error?
Actually there is one operation that can achieve what you want, however it is not listed in the public API. Beware that may not be the best practice.
You need resource_variable_ops which you can find under tensorflow.python.ops.
import tensorflow as tf
from tensorflow.python.ops import resource_variable_ops
class MyModule(tf.Module):
def __init__(self):
pass
#tf.function(input_signature=[
tf.TensorSpec(shape=[None], dtype=tf.int32),
resource_variable_ops.VariableSpec(shape=[None], dtype=tf.int32)
])
def __call__(self, x, v):
v.assign(x*v, read_value=False)
return v
x = tf.constant([10,10])
v = tf.Variable(2*tf.ones_like(x), trainable=False)
module = MyModule()
module(x, v)
Here is the solution I have found:
class MyModule(tf.Module):
def __init__(self):
self.v = tf.Variable([[]], shape=tf.TensorShape([None]*2), dtype=tf.int32)
#tf.function(input_signature=[tf.TensorSpec(shape=[None]*2, dtype=tf.int32), tf.TensorSpec(shape=[None]*2, dtype=tf.int32)])
def __call__(self, x, v):
self.v.assign( x * v, read_value=False)
return self.v
Defining tf.Variable inside the constructor of the model with empty values solved the problem.
you can complete it with a simple method.
[ Sample ]:
import tensorflow as tf
class MyModule(tf.Module):
def __init__(self, v):
self.v = v
pass
#tf.function(input_signature=[tf.TensorSpec(shape=None, dtype=tf.int32), tf.TensorSpec(shape=None, dtype=tf.int32)])
def __call__(self, x, v):
self.v.assign( x * v, read_value=False )
return self.v
x = tf.constant( tf.random.uniform(shape=[2,1], maxval=3, dtype=tf.int32) )
v = tf.Variable([[1], [2]])
module = MyModule(v)
print( module(x, v) )
#############################################
x = tf.constant( tf.random.uniform(shape=[3,1], maxval=3, dtype=tf.int32) )
v = tf.Variable([[1], [2], [3]])
module = MyModule(v)
print( module(x, v) )
[ Output ]:
tf.Tensor(
[[1]
[0]], shape=(2, 1), dtype=int32)
tf.Tensor(
[[2]
[0]
[6]], shape=(3, 1), dtype=int32)
I'm practicing using TensorFlow's custom_gradient decorator and I tried to define a simple ReLU. One would think it would be as simple as defining the gradient to be 1 when x > 0 and 0 otherwise. However, the following code does not yield the same gradients as a ReLU:
#tf.custom_gradient
def relu(x):
def grad(dy):
return tf.cond(tf.reshape(x, []) > 0,
lambda: tf.cast(tf.reshape(1, dy.shape), tf.float32),
lambda: tf.cast(tf.reshape(0, dy.shape), tf.float32))
return tf.nn.relu(x), grad
Can someone explain to me why this standard definition of ReLU's gradient does not yield the same performance as:
#tf.custom_gradient
def relu(x):
def grad(dy):
return dy
return tf.nn.relu(x), grad
I am trying to write some custom TensorFlow functions in python (using tf.py_func) where I want to calculate both the results and the gradients in python. I'm using the gradient_override_map trick (for example from from https://gist.github.com/harpone/3453185b41d8d985356cbe5e57d67342 and How to make a custom activation function with only Python in Tensorflow?).
However, while the function in the forward direction gets a numpy array as an input, the function for the gradient gets Tensors. This is a problem, depending on when the function gets called, because there may not be a default session, and/or there may not be a feed_dict with all the required values yet (for example, in a tf.train optimizer).
How do I do a py_func where both the forward and backward functions get (and return) numpy arrays?
Sample code:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
def sin_func(x):
return np.sin(x)
def sin_grad_func(op, grad):
x = op.inputs[0].eval()
grad = grad.eval() # <--- this is what I'd like to avoid
output_grad = np.cos(x) * grad
return tf.convert_to_tensor(output_grad)
def py_func(func, inp, Tout, stateful=True, name=None, grad_func=None):
grad_name = 'PyFuncGrad_' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(grad_name)(grad_func)
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": grad_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
with tf.Session() as sess:
np_x = np.linspace(0, np.pi, num=1000, dtype=np.float32)
x = tf.constant(np_x)
y = py_func(sin_func,
[x],
[tf.float32],
name='np_sin',
grad_func=sin_grad_func)
y = y[0]
gr = tf.gradients(y, [x])
tf.global_variables_initializer().run()
plt.plot(y.eval())
plt.plot(gr[0].eval())
If you want to include arbitrary Python code in your gradient function, the easiest solution is to create another tf.py_func() inside sin_grad_func():
def sin_grad_func_impl(x, grad):
return np.cos(x) * grad
def sin_grad_func(op, grad):
return tf.py_func(sin_grad_func_impl, [x, grad], grad.dtype)
In tensorflow, we can define our own op and its gradient by:
https://gist.github.com/harpone/3453185b41d8d985356cbe5e57d67342
However, can we modify any variable in the computational graph in these python functions. For example in the "_MySquareGrad" function?
I assume we can get the variable by:
var = tf.get_variable('var')
and then do something to change its value and then assign it back?
e.g.
tmp = var*10
var.assign(tmp)
Thanks!
Also when we do var*10, do we have to convert it to numpy?
Background: I'm familiar with automatic differentiation, but new to Tensorflow and Python. So please point out any syntactic problem and let me know if my intention is clear.
You can modify the variables in the computational graph in these python functions. Your example code with tmp = var*10 will work and does not convert anything to numpy.
In fact you should try to avoid converting to numpy as much as possible since it will slow down the computation.
edit:
You can include your code to the gradient computation graph of the _MySquareGrad function doing this:
def _MySquareGrad(op, grad):
#first get a Variable that was created using tf.get_variable()
with tf.variable_scope("", reuse=True):
var = tf.get_variable('var')
#now create the assign graph:
tmp = var*10.
assign_op = var.assign(tmp)
#now make the assign operation part of the grad calculation graph:
with tf.control_dependencies([assign_op]):
x = tf.identity(op.inputs[0])
return grad * 20 * x
Here is a working example:
import tensorflow as tf
from tensorflow.python.framework import ops
import numpy as np
# Define custom py_func which takes also a grad op as argument:
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
# Need to generate a unique name to avoid duplicates:
rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad example
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": rnd_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
# Def custom square function using np.square instead of tf.square:
def mysquare(x, name=None):
with ops.name_scope(name, "Mysquare", [x]) as name:
sqr_x = py_func(np.square,
[x],
[tf.float32],
name=name,
grad=_MySquareGrad) # <-- here's the call to the gradient
return sqr_x[0]
### Actual gradient:
##def _MySquareGrad(op, grad):
##x = op.inputs[0]
##return grad * 20 * x # add a "small" error just to see the difference:
def _MySquareGrad(op, grad):
#first get a Variable that was created using tf.get_variable()
with tf.variable_scope("", reuse=True):
var = tf.get_variable('var')
#now create the assign graph:
tmp = var*10.
assign_op = var.assign(tmp)
#now make the assign operation part of the grad calculation graph:
with tf.control_dependencies([assign_op]):
x = tf.identity(op.inputs[0])
return grad * 20 * x
with tf.Session() as sess:
x = tf.constant([1., 2.])
var = tf.get_variable(name="var", shape=[], initializer=tf.constant_initializer(0.2))
y = mysquare(x)
tf.global_variables_initializer().run()
print(x.eval(), y.eval(), tf.gradients(y, x)[0].eval())
print("Now var is 10 times larger:", var.eval())