Although I have gone through the pages regarding the same question: What is the difference between variable_scope and name_scope? and What is the difference between variable_ops_scope and variable_scope?.
I still cannot fully understand their differences. I have tried to use tf.variable_scope and tf.name_scope for the same code, I found they have the same graph by TensorBoard.
Other people have discussed their main differences with the generated name in the Graph, while is their name so important? I also saw that the variable with the same name would be reused. What is the reuse occasion?
The key is to understand the difference between variables and other tensors in the graph. Any newly created tensors will gain a prefix from a name scope. tf.get_variable will look for existing variables without the name scope modifier. Newly created variables with tf.get_variable will still get their name's augmented.
The script below highlights these differences. The intention is to reproduce the simple function by refactoring the tf.matmul(x, A) + b line and variable creation into a separate function add_layer.
import tensorflow as tf
def get_x():
return tf.constant([[1., 2., 3.]], dtype=tf.float32)
def report(out1, out2):
print(out1.name)
print(out2.name)
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
print([v.name for v in variables])
def simple():
A = tf.get_variable(shape=(3, 3), dtype=tf.float32, name='A')
b = tf.get_variable(shape=(3,), dtype=tf.float32, name='b')
x = get_x()
out1 = tf.matmul(x, A) + b
out2 = tf.matmul(out1, A) + b
return out1, out2
def add_layer(x):
A = tf.get_variable(shape=(3, 3), dtype=tf.float32, name='A')
b = tf.get_variable(shape=(3,), dtype=tf.float32, name='b')
return tf.matmul(x, A) + b
def no_scoping():
x = get_x()
out1 = add_layer(x)
out2 = add_layer(out1)
return out1, out2
def different_name_scopes():
x = get_x()
with tf.name_scope('first_layer'):
out1 = add_layer(x)
with tf.name_scope('second_layer'):
out2 = add_layer(out1)
return out1, out2
def same_name_scope():
x = get_x()
with tf.name_scope('first_layer'):
out1 = add_layer(x)
with tf.name_scope('first_layer'):
out2 = add_layer(out1)
return out1, out2
def different_variable_scopes():
x = get_x()
with tf.variable_scope('first_layer'):
out1 = add_layer(x)
with tf.variable_scope('second_layer'):
out2 = add_layer(out1)
return out1, out2
def same_variable_scope():
x = get_x()
with tf.variable_scope('first_layer'):
out1 = add_layer(x)
with tf.variable_scope('first_layer'):
out2 = add_layer(out1)
return out1, out2
def same_variable_scope_reuse():
x = get_x()
with tf.variable_scope('first_layer'):
out1 = add_layer(x)
with tf.variable_scope('first_layer', reuse=True):
out2 = add_layer(out1)
return out1, out2
def test_fn(fn, name):
graph = tf.Graph()
with graph.as_default():
try:
print('****************')
print(name)
print('****************')
out1, out2 = fn()
report(out1, out2)
print('----------------')
print('SUCCESS')
print('----------------')
except Exception:
print('----------------')
print('FAILED')
print('----------------')
for fn, name in [
[simple, 'simple'],
[no_scoping, 'no_scoping'],
[different_name_scopes, 'different_name_scopes'],
[same_name_scope, 'same_name_scope'],
[different_variable_scopes, 'different_variable_scopes'],
[same_variable_scope, 'same_variable_scope'],
[same_variable_scope_reuse, 'same_variable_scope_reuse']
]:
test_fn(fn, name)
Results:
****************
simple
****************
add:0
add_1:0
[u'A:0', u'b:0']
----------------
SUCCESS
----------------
****************
no_scoping
****************
----------------
FAILED
----------------
****************
different_name_scopes
****************
----------------
FAILED
----------------
****************
same_name_scope
****************
----------------
FAILED
----------------
****************
different_variable_scopes
****************
first_layer/add:0
second_layer/add:0
[u'first_layer/A:0', u'first_layer/b:0', u'second_layer/A:0', u'second_layer/b:0']
----------------
SUCCESS
----------------
****************
same_variable_scope
****************
----------------
FAILED
----------------
****************
same_variable_scope_reuse
****************
first_layer/add:0
first_layer_1/add:0
[u'first_layer/A:0', u'first_layer/b:0']
----------------
SUCCESS
----------------
Note that using different variable_scopes without reuse doesn't raise an error, but creates multiple copies of A and b, which may not be intended.
Related
I am working on an artifical neural network which I have created via subclassing.
The subclassing looks like this:
import time
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import scipy.stats as si
import sympy as sy
from sympy.stats import Normal, cdf
from sympy import init_printing
class DGMNet(tf.keras.Model):
def __init__(self, n_layers, n_nodes, dimensions=1):
"""
Parameters:
- n_layers: number of layers
- n_nodes: number of nodes in (inner) layers
- dimensions: number of spacial dimensions
"""
super().__init__()
self.n_layers = n_layers
self.initial_layer = DenseLayer(dimensions + 1, n_nodes, activation="relu")
self.lstmlikelist = []
for _ in range(self.n_layers):
self.lstmlikelist.append(LSTMLikeLayer(dimensions + 1, n_nodes, activation="relu"))
self.final_layer = DenseLayer(n_nodes, 1, activation=None)
def call(self, t, x):
X = tf.concat([t,x], 1)
S = self.initial_layer.call(X)
for i in range(self.n_layers):
S = self.lstmlikelist[i].call({'S': S, 'X': X})
result = self.final_layer.call(S)
return result
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
"""
Parameters:
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
"""
super(DenseLayer, self).__init__()
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.W = self.add_weight(shape=(self.n_inputs, self.n_outputs),
initializer='random_normal',
trainable=True)
self.b = self.add_weight(shape=(1, self.n_outputs),
initializer='random_normal',
trainable=True)
self.activation = _get_function(activation)
def call(self, inputs):
S = tf.add(tf.matmul(inputs, self.W), self.b)
S = self.activation(S)
return S
class LSTMLikeLayer(tf.keras.layers.Layer):
def __init__(self, n_inputs, n_outputs, activation):
"""
Parameters:
- n_inputs: number of inputs
- n_outputs: number of outputs
- activation: activation function
"""
super(LSTMLikeLayer, self).__init__()
self.n_outputs = n_outputs
self.n_inputs = n_inputs
self.Uz = self.add_variable("Uz", shape=[self.n_inputs, self.n_outputs])
self.Ug = self.add_variable("Ug", shape=[self.n_inputs, self.n_outputs])
self.Ur = self.add_variable("Ur", shape=[self.n_inputs, self.n_outputs])
self.Uh = self.add_variable("Uh", shape=[self.n_inputs, self.n_outputs])
self.Wz = self.add_variable("Wz", shape=[self.n_outputs, self.n_outputs])
self.Wg = self.add_variable("Wg", shape=[self.n_outputs, self.n_outputs])
self.Wr = self.add_variable("Wr", shape=[self.n_outputs, self.n_outputs])
self.Wh = self.add_variable("Wh", shape=[self.n_outputs, self.n_outputs])
self.bz = self.add_variable("bz", shape=[1, self.n_outputs])
self.bg = self.add_variable("bg", shape=[1, self.n_outputs])
self.br = self.add_variable("br", shape=[1, self.n_outputs])
self.bh = self.add_variable("bh", shape=[1, self.n_outputs])
self.activation = _get_function(activation)
def call(self, inputs):
S = inputs['S']
X = inputs['X']
Z = self.activation(tf.add(tf.add(tf.matmul(X, self.Uz), tf.matmul(S, self.Wz)), self.bz))
G = self.activation(tf.add(tf.add(tf.matmul(X, self.Ug), tf.matmul(S, self.Wg)), self.bg))
R = self.activation(tf.add(tf.add(tf.matmul(X, self.Ur), tf.matmul(S, self.Wr)), self.br))
H = self.activation(tf.add(tf.add(tf.matmul(X, self.Uh), tf.matmul(tf.multiply(S, R), self.Wh)), self.bh))
Snew = tf.add(tf.multiply(tf.subtract(tf.ones_like(G), G), H), tf.multiply(Z, S))
return Snew
def _get_function(name):
f = None
if name == "tanh":
f = tf.nn.tanh
elif name == "sigmoid":
f = tf.nn.sigmoid
elif name == "relu":
f = tf.nn.relu
elif not name:
f = tf.identity
assert f is not None
return f
# Sampling
def sampler(N1, N2, N3):
np.random.seed(42)
# Sampler #1: PDE domain
t1 = np.random.uniform(low=T0,
high=T,
size=[N1,1])
s1 = np.random.uniform(low=S1,
high=S2,
size=[N1,1])
# Sampler #2: boundary condition
t2 = np.zeros(shape=(1, 1))
s2 = np.zeros(shape=(1, 1))
# Sampler #3: initial/terminal condition
t3 = T * np.ones((N3,1)) #Terminal condition
s3 = np.random.uniform(low=S1,
high=S2,
size=[N3,1])
return (t1, s1, t2, s2, t3, s3)
# Loss function
def loss(model, t1, x1, t2, x2, t3, x3):
# Loss term #1: PDE
V = model(t1, x1)
V_t = tf.gradients(V, t1)[0]
V_x = tf.gradients(V, x1)[0]
V_xx = tf.gradients(V_x, x1)[0]
f = V_t + r*x1*V_x + 0.5*sigma**2*x1**2*V_xx - r*V
L1 = tf.reduce_mean(tf.square(f))
# Loss term #2: boundary condition
#L2 = tf.reduce_mean(tf.square(V))
# Loss term #3: initial/terminal condition
L3 = tf.reduce_mean(tf.square(model(t3, x3) - tf.math.maximum(x3-K,0)))
return (L1, L3)
# B-S's analytical known solution
def analytical_solution(t, x):
#C = SN(d1) - Xe- rt N(d2)
#S: spot price
#K: strike price
#T: time to maturity
#r: interest rate
#sigma: volatility of underlying asset
d1 = (np.log(x / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
d2 = (np.log(x / K) + (r - 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
call = (x * si.norm.cdf(d1, 0.0, 1.0) - K * np.exp(-r * T) * si.norm.cdf(d2, 0.0, 1.0))
return call
# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)
# Strike price
K = 0.5
# PDE parameters
r = 0.05 # Interest rate
sigma = 0.25 # Volatility
# Time limits
T0 = 0.0 + 1e-10 # Initial time
T = 1.0 # Terminal time
# Space limits
S1 = 0.0 + 1e-10 # Low boundary
S2 = 1.0 # High boundary
# Number of samples
NS_1 = 1000
NS_2 = 0
NS_3 = 100
t1, s1, t2, s2, t3, s3 = sampler(NS_1, NS_2, NS_3)
Now what I want to do is to iterate over different parameters and create a new ann for each iteration.
My plan was to do it in this way:
tf.compat.v1.disable_eager_execution()
t1_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x1_t = tf.compat.v1.placeholder(tf.float32, [None,1])
t2_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x2_t = tf.compat.v1.placeholder(tf.float32, [None,1])
t3_t = tf.compat.v1.placeholder(tf.float32, [None,1])
x3_t = tf.compat.v1.placeholder(tf.float32, [None,1])
volatility_list = [0.08]#[0.08, 0.16, 0.18, 0.2, 0.28]
stages_list = [10]#, 50, 100]
layers_list = [3]#, 5, 7]
npl_list = [3]#, 6, 9, 12, 15]
for sigma in volatility_list:
for st in stages_list:
for lay in layers_list:
for npl in npl_list:
# Neural Network definition
num_layers = lay
nodes_per_layer = npl
ann = DGMNet(num_layers, nodes_per_layer)
L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t)
loss_t = L1_t + L3_t
# Optimizer parameters
global_step = tf.Variable(1, trainable=False)
starter_learning_rate = 0.001
learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, global_step,
100000, 0.96, staircase=True)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_t)
# Training parameters
steps_per_sample = st
sampling_stages = 100#2000
# Plot tensors
tplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="tplot_t") # We name to recover it later
xplot_t = tf.compat.v1.placeholder(tf.float32, [None,1], name="xplot_t")
vplot_t = tf.identity(ann(tplot_t, xplot_t), name="vplot_t") # Trick for naming the trained model
# Training data holders
sampling_stages_list = []
elapsed_time_list = []
loss_list = []
L1_list = []
L3_list = []
# Train network!!
init_op = tf.compat.v1.global_variables_initializer()
sess = tf.compat.v1.Session()
sess.run(init_op)
for i in range(sampling_stages):
t1, x1, t2, x2, t3, x3 = sampler(NS_1, NS_2, NS_3)
start_time = time.clock()
for _ in range(steps_per_sample):
loss, L1, L3, _ = sess.run([loss_t, L1_t, L3_t, optimizer],
feed_dict = {t1_t:t1, x1_t:x1, t2_t:t2, x2_t:x2, t3_t:t3, x3_t:x3})
end_time = time.clock()
elapsed_time = end_time - start_time
sampling_stages_list.append(i)
elapsed_time_list.append(elapsed_time)
loss_list.append(loss)
L1_list.append(L1)
L3_list.append(L3)
text = "Stage: {:04d}, Loss: {:e}, L1: {:e}, L3: {:e}, {:f} seconds".format(i, loss, L1, L3, elapsed_time)
print(text)
#goodness of fit
time_0 = 0
listofzeros = [time_0] * 100
prices_for_goodness = np.linspace(S1,S2, 100)
goodness_list = []
solution_goodness = analytical_solution(listofzeros, prices_for_goodness)
ttt = time_0*np.ones_like(prices_for_goodness.reshape(-1,1))
nn_goodness, = sess.run([vplot_t],
feed_dict={tplot_t:ttt, xplot_t:prices_for_goodness.reshape(-1,1)})
deviation_list = np.abs(solution_goodness - nn_goodness)/(T-T0)
print("{0:.2f}%".format(np.average(deviation_list)*100))
Unfortunately as soon as it ends the first iteration I get a TypeError that 'numpy.float32' object is not callable
Error Traceback:
TypeError Traceback (most recent call last)
<ipython-input-14-bb14643d0c42> in <module>()
10
11
---> 12 L1_t, L3_t = loss(ann, t1_t, x1_t, t2_t, x2_t, t3_t, x3_t)
13 loss_t = L1_t + L3_t
14
TypeError: 'numpy.float32' object is not callable
I guess that the problem is with the creation of the placeholders, however I am not sure how to solve it. Maybe one of you can help me
Thanks in advance!
Chris
Did you create a variable called 'loss'? It seems that the loss function is redefined by a variable with the same name, so then python tries to call that variable as a function.
The following snippet is from a fairly large piece of code but hopefully I can give all the information necessary:
y2 = tf.matmul(y1,ymask)
dist = tf.norm(ystar-y2,axis=0)
y1 and y2 are 128x30 and ymask is 30x30. ystar is 128x30. dist is 1x30. When ymask is the identity matrix, everything works fine. But when I set it to be all zeros, apart from a single 1 along the diagonal (so as to set all columns but one in y2 to be zero), I get nans for the gradient of dist with respect to y2, using tf.gradients(dist, [y2]). The specific value of dist is [0,0,7.9,0,...], with all the ystar-y2 values being around the range (-1,1) in the third column and zero elsewhere.
I'm pretty confused as to why a numerical issue would occur here, given there are no logs or divisions, is this underflow? Am I missing something in the maths?
For context, I'm doing this to try to train individual dimensions of y, one at a time, using the whole network.
longer version to reproduce:
import tensorflow as tf
import numpy as np
import pandas as pd
batchSize = 128
eta = 0.8
tasks = 30
imageSize = 32**2
groups = 3
tasksPerGroup = 10
trainDatapoints = 10000
w = np.zeros([imageSize, groups * tasksPerGroup])
toyIndex = 0
for toyLoop in range(groups):
m = np.ones([imageSize]) * np.random.randn(imageSize)
for taskLoop in range(tasksPerGroup):
w[:, toyIndex] = m * 0.1 * np.random.randn(1)
toyIndex += 1
xRand = np.random.normal(0, 0.5, (trainDatapoints, imageSize))
taskLabels = np.matmul(xRand, w) + np.random.normal(0,0.5,(trainDatapoints, groups * tasksPerGroup))
DF = np.concatenate((xRand, taskLabels), axis=1)
trainDF = pd.DataFrame(DF[:trainDatapoints, ])
# define graph variables
x = tf.placeholder(tf.float32, [None, imageSize])
W = tf.Variable(tf.zeros([imageSize, tasks]))
b = tf.Variable(tf.zeros([tasks]))
ystar = tf.placeholder(tf.float32, [None, tasks])
ymask = tf.placeholder(tf.float32, [tasks, tasks])
dataLength = tf.cast(tf.shape(ystar)[0],dtype=tf.float32)
y1 = tf.matmul(x, W) + b
y2 = tf.matmul(y1,ymask)
dist = tf.norm(ystar-y2,axis=0)
mse = tf.reciprocal(dataLength) * tf.reduce_mean(tf.square(dist))
grads = tf.gradients(dist, [y2])
trainStep = tf.train.GradientDescentOptimizer(eta).minimize(mse)
# build graph
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
randTask = np.random.randint(0, 9)
ymaskIn = np.zeros([tasks, tasks])
ymaskIn[randTask, randTask] = 1
batch = trainDF.sample(batchSize)
batch_xs = batch.iloc[:, :imageSize]
batch_ys = np.zeros([batchSize, tasks])
batch_ys[:, randTask] = batch.iloc[:, imageSize + randTask]
gradOut = sess.run(grads, feed_dict={x: batch_xs, ystar: batch_ys, ymask: ymaskIn})
sess.run(trainStep, feed_dict={x: batch_xs, ystar: batch_ys, ymask:ymaskIn})
Here's a very simple reproduction:
import tensorflow as tf
with tf.Graph().as_default():
y = tf.zeros(shape=[1], dtype=tf.float32)
dist = tf.norm(y,axis=0)
(grad,) = tf.gradients(dist, [y])
with tf.Session():
print(grad.eval())
Prints:
[ nan]
The issue is that tf.norm computes sum(x**2)**0.5. The gradient is x / sum(x**2) ** 0.5 (see e.g. https://math.stackexchange.com/a/84333), so when sum(x**2) is zero we're dividing by zero.
There's not much to be done in terms of a special case: the gradient as x approaches all zeros depends on which direction it's approaching from. For example if x is a single-element vector, the limit as x approaches 0 could either be 1 or -1 depending on which side of zero it's approaching from.
So in terms of solutions, you could just add a small epsilon:
import tensorflow as tf
def safe_norm(x, epsilon=1e-12, axis=None):
return tf.sqrt(tf.reduce_sum(x ** 2, axis=axis) + epsilon)
with tf.Graph().as_default():
y = tf.constant([0.])
dist = safe_norm(y,axis=0)
(grad,) = tf.gradients(dist, [y])
with tf.Session():
print(grad.eval())
Prints:
[ 0.]
Note that this is not actually the Euclidean norm. It's a good approximation as long as the input is much larger than epsilon.
I'm wonder wrt this topic
I want to resolve update issue in Theano.function with this lazy tensorflow constrution:
class TensorFlowTheanoFunction(object):
def __init__(self, inputs, outputs, session):
self._inputs = inputs
self._outputs = outputs
self.session = session
def __call__(self, *args, **kwargs):
feeds = {}
for (argpos, arg) in enumerate(args):
feeds[self._inputs[argpos]] = arg
return self.session.run(self._outputs, feeds)
If I want to pass an update argument (like in Theano) how I can modify this lazy call?
I just want that this can also work in tensorflow:
self.new = theano.function([], [], updates=zip(old_params, params))
Just modifying Yaroslav's code from that thread to use tf.assign, with a control dependency to make sure the outputs are computed before the assignments happen:
import tensorflow as tf
class TensorFlowTheanoFunction(object):
def __init__(self, inputs, outputs, updates=()):
self._inputs = inputs
self._outputs = outputs
self._updates = updates
def __call__(self, *args, **kwargs):
feeds = {}
for (argpos, arg) in enumerate(args):
feeds[self._inputs[argpos]] = arg
try:
outputs_identity = [tf.identity(output) for output in self._outputs]
output_is_list = True
except TypeError:
outputs_identity = [tf.identity(self._outputs)]
output_is_list = False
with tf.control_dependencies(outputs_identity):
assign_ops = [tf.assign(variable, replacement)
for variable, replacement in self._updates]
outputs_list = tf.get_default_session().run(
outputs_identity + assign_ops, feeds)[:len(outputs_identity)]
if output_is_list:
return outputs_list
else:
assert len(outputs_list) == 1
return outputs_list[0]
a = tf.placeholder(dtype=tf.int32)
b = tf.placeholder(dtype=tf.int32)
variable = tf.get_variable(
"variable", shape=[], dtype=tf.int32, initializer=tf.zeros_initializer)
c = a + b + variable
d = a - b
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
f = TensorFlowTheanoFunction([a, b], [c, d], updates=[(variable, variable + 1)])
print f(1, 2)
print f(1, 2)
print f(0, 2)
f = TensorFlowTheanoFunction([a, b], c, updates=[(variable, variable + 1)])
print f(1, 2)
print f(1, 2)
print f(0, 2)
This updates the variable at each iteration:
[3, -1]
[4, -1]
[4, -2]
6
7
7
I have a RNN like structure that has some building blocks (component neural networks) that are passed in by the user. Here is a minimal example:
import tensorflow as tf
tf.reset_default_graph()
def initialize(shape):
init = tf.random_normal(shape, mean=0, stddev=0.1, dtype=tf.float32)
return init
def test_rnn_with_external(input, hiddens, external_fct):
"""
A simple rnn that makes the standard update, then
feeds the new hidden state through some external
function.
"""
dim_in = input.get_shape().as_list()[-1]
btsz = input.get_shape().as_list()[1]
shape = (dim_in + hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("rnn_w", initializer=_init)
_init = tf.zeros([hiddens])
b = tf.get_variable("rnn_b", initializer=_init)
def _step(previous, input):
concat = tf.concat(1, [input, previous])
h_t = tf.tanh(tf.add(tf.matmul(concat, W), b))
h_t = external_fct(h_t)
return h_t
h_0 = tf.zeros([btsz, hiddens])
states = tf.scan(_step,
input,
initializer=h_0,
name="states")
return states
# the external function, relying on the templating mechanism.
def ext_fct(hiddens):
"""
"""
def tmp(input):
shape = (hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("ext_w", initializer=_init)
b = 0
return tf.add(tf.matmul(input, W), b, name="external")
return tf.make_template(name_="external_fct", func_=tmp)
# run from here on
t = 5
btsz = 4
dim = 2
hiddens = 3
x = tf.placeholder(tf.float32, shape=(t, btsz, dim))
ext = ext_fct(hiddens)
states = test_rnn_with_external(x, hiddens, external_fct=ext)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
with the error ending in:
InvalidArgumentError: All inputs to node external_fct/ext_w/Assign must be from the same frame.
With Frame, I would associate an area on the stack. So I thought that maybe tf.make_template does something very wired, and thus it is not useable here. The external function can be rewritten a bit and then called more directly, like so:
import tensorflow as tf
tf.reset_default_graph()
def initialize(shape):
init = tf.random_normal(shape, mean=0, stddev=0.1, dtype=tf.float32)
return init
def test_rnn_with_external(input, hiddens, external_fct):
dim_in = input.get_shape().as_list()[-1]
btsz = input.get_shape().as_list()[1]
shape = (dim_in + hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("rnn_w", initializer=_init)
_init = tf.zeros([hiddens])
b = tf.get_variable("rnn_b", initializer=_init)
def _step(previous, input):
"""
"""
concat = tf.concat(1, [input, previous])
h_t = tf.tanh(tf.add(tf.matmul(concat, W), b))
h_t = external_fct(h_t, hiddens)
return h_t
h_0 = tf.zeros([btsz, hiddens])
states = tf.scan(_step,
input,
initializer=h_0,
name="states")
return states
def ext_fct_new(input, hiddens):
"""
"""
shape = (hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("ext_w_new", initializer=_init)
b = 0
return tf.add(tf.matmul(input, W), b, name="external_new")
t = 5
btsz = 4
dim = 2
hiddens = 3
x = tf.placeholder(tf.float32, shape=(t, btsz, dim))
states = test_rnn_with_external(x, hiddens, external_fct=ext_fct_new)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
However, still the same error InvalidArgumentError: All inputs to node ext_w_new/Assign must be from the same frame.
Of course, moving contents of the external function into the _step part (and tf.get_variableing before) works. But then the flexibility (necessary in the original code) is gone.
What am I doing wrong? Any help/tips/pointers is greatly appreciated.
(Note: Asked this on github, too: https://github.com/tensorflow/tensorflow/issues/4478)
Using a tf.constant_initializer solves the problem. This is described here.
Suppose I have two neural network model defined each with 1 input placeholder and 1 output tensor. From these 2 outputs I need 3 separate values.
inputs: i1, i2, outputs: o1, o2
a = 1
b = 2
v1 = session.run(o1, feed_dict={i1: a})
v2 = session.run(o1, feed_dict={i1: b})
v3 = session.run(o2, feed_dict={i2: a})
The problem is I need to feed these 3 values into a loss function so I can't do the above. I need to do
loss = session.run(L, feed_dict={i1: a, i1: b, i2:a })
I don't think I can do that but even if I could I would still have the ambiguity in later operations since o1 with input i1 is used differently than o1 with input i2.
I think it could be solved by having 2 input placeholders and 2 outputs in the first neural network. So given I already have a model is there a way to restructure the inputs and outputs so that I can accommodate this?
Visually I want to turn
i1 ---- (model) ----- o1
into
i1a o1a
\ /
\ /
x ----- (model) ----- x
/ \
/ \
i1b o1b
Your intuition is right, you have to create 2 different placeholders i1a and i1b for your network 1, with two outputs o1a and o1b. Your visuals look great so here is my proposition:
i1a ----- (model) ----- o1a
|
shared weights
|
i1b ----- (model) ----- o1b
The proper way to do that is to duplicate your network by using tf.get_variable() for every variable with reuse=True.
def create_variables():
with tf.variable_scope('model'):
w1 = tf.get_variable('w1', [1, 2])
b1 = tf.get_variable('b1', [2])
def inference(input):
with tf.variable_scope('model', reuse=True):
w1 = tf.get_variable('w1')
b1 = tf.get_variable('b1')
output = tf.matmul(input, w1) + b1
return output
create_variables()
i1a = tf.placeholder(tf.float32, [3, 1])
o1a = inference(i1a)
i1b = tf.placeholder(tf.float32, [3, 1])
o1b = inference(i1b)
loss = tf.reduce_mean(o1a - o1b)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
sess.run(loss, feed_dict={i1a: [[0.], [1.], [2.]], i1b: [[0.5], [1.5], [2.5]]})