Tensorflow: Calling externally set-up function in `tf.scan` (e.g using `tf.make_template`) results in error - tensorflow

I have a RNN like structure that has some building blocks (component neural networks) that are passed in by the user. Here is a minimal example:
import tensorflow as tf
tf.reset_default_graph()
def initialize(shape):
init = tf.random_normal(shape, mean=0, stddev=0.1, dtype=tf.float32)
return init
def test_rnn_with_external(input, hiddens, external_fct):
"""
A simple rnn that makes the standard update, then
feeds the new hidden state through some external
function.
"""
dim_in = input.get_shape().as_list()[-1]
btsz = input.get_shape().as_list()[1]
shape = (dim_in + hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("rnn_w", initializer=_init)
_init = tf.zeros([hiddens])
b = tf.get_variable("rnn_b", initializer=_init)
def _step(previous, input):
concat = tf.concat(1, [input, previous])
h_t = tf.tanh(tf.add(tf.matmul(concat, W), b))
h_t = external_fct(h_t)
return h_t
h_0 = tf.zeros([btsz, hiddens])
states = tf.scan(_step,
input,
initializer=h_0,
name="states")
return states
# the external function, relying on the templating mechanism.
def ext_fct(hiddens):
"""
"""
def tmp(input):
shape = (hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("ext_w", initializer=_init)
b = 0
return tf.add(tf.matmul(input, W), b, name="external")
return tf.make_template(name_="external_fct", func_=tmp)
# run from here on
t = 5
btsz = 4
dim = 2
hiddens = 3
x = tf.placeholder(tf.float32, shape=(t, btsz, dim))
ext = ext_fct(hiddens)
states = test_rnn_with_external(x, hiddens, external_fct=ext)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
with the error ending in:
InvalidArgumentError: All inputs to node external_fct/ext_w/Assign must be from the same frame.
With Frame, I would associate an area on the stack. So I thought that maybe tf.make_template does something very wired, and thus it is not useable here. The external function can be rewritten a bit and then called more directly, like so:
import tensorflow as tf
tf.reset_default_graph()
def initialize(shape):
init = tf.random_normal(shape, mean=0, stddev=0.1, dtype=tf.float32)
return init
def test_rnn_with_external(input, hiddens, external_fct):
dim_in = input.get_shape().as_list()[-1]
btsz = input.get_shape().as_list()[1]
shape = (dim_in + hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("rnn_w", initializer=_init)
_init = tf.zeros([hiddens])
b = tf.get_variable("rnn_b", initializer=_init)
def _step(previous, input):
"""
"""
concat = tf.concat(1, [input, previous])
h_t = tf.tanh(tf.add(tf.matmul(concat, W), b))
h_t = external_fct(h_t, hiddens)
return h_t
h_0 = tf.zeros([btsz, hiddens])
states = tf.scan(_step,
input,
initializer=h_0,
name="states")
return states
def ext_fct_new(input, hiddens):
"""
"""
shape = (hiddens, hiddens)
_init = initialize(shape)
W = tf.get_variable("ext_w_new", initializer=_init)
b = 0
return tf.add(tf.matmul(input, W), b, name="external_new")
t = 5
btsz = 4
dim = 2
hiddens = 3
x = tf.placeholder(tf.float32, shape=(t, btsz, dim))
states = test_rnn_with_external(x, hiddens, external_fct=ext_fct_new)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
However, still the same error InvalidArgumentError: All inputs to node ext_w_new/Assign must be from the same frame.
Of course, moving contents of the external function into the _step part (and tf.get_variableing before) works. But then the flexibility (necessary in the original code) is gone.
What am I doing wrong? Any help/tips/pointers is greatly appreciated.
(Note: Asked this on github, too: https://github.com/tensorflow/tensorflow/issues/4478)

Using a tf.constant_initializer solves the problem. This is described here.

Related

Triplet-Loss using pre-trained network

I am trying to use the Triple-Loss technique to fine-tune an EfficientNet network for human Re-ID using Keras. Here is the code I am using:
This is the generator:
class SampleGen(object):
def __init__(self, file_class_mapping):
self.file_class_mapping = file_class_mapping
self.class_to_list_files = defaultdict(list)
self.list_all_files = list(file_class_mapping.keys())
self.range_all_files = list(range(len(self.list_all_files)))
for file, class_ in file_class_mapping.items():
self.class_to_list_files[class_].append(file)
self.list_classes = list(set(self.file_class_mapping.values()))
self.range_list_classes = range(len(self.list_classes))
self.class_weight = np.array([len(self.class_to_list_files[class_]) for class_ in self.list_classes])
self.class_weight = self.class_weight / np.sum(self.class_weight)
def get_sample(self):
class_idx = np.random.choice(self.range_list_classes, 1, p=self.class_weight)[0]
examples_class_idx = np.random.choice(range(len(self.class_to_list_files[self.list_classes[class_idx]])), 2)
positive_example_1, positive_example_2 = \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[0]], \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[1]]
negative_example = None
while negative_example is None or self.file_class_mapping[negative_example] == \
self.file_class_mapping[positive_example_1]:
negative_example_idx = np.random.choice(self.range_all_files, 1)[0]
negative_example = self.list_all_files[negative_example_idx]
return positive_example_1, negative_example, positive_example_2
def read_and_resize(filepath):
im = Image.open((filepath)).convert('RGB')
im = im.resize((image_size, image_size))
return np.array(im, dtype="float32")
def augment(im_array):
if np.random.uniform(0, 1) > 0.9:
im_array = np.fliplr(im_array)
return im_array
def gen(triplet_gen):
while True:
list_positive_examples_1 = []
list_negative_examples = []
list_positive_examples_2 = []
for i in range(batch_size):
positive_example_1, negative_example, positive_example_2 = triplet_gen.get_sample()
path_pos1 = join(path_train, positive_example_1)
path_neg = join(path_train, negative_example)
path_pos2 = join(path_train, positive_example_2)
positive_example_1_img = read_and_resize(path_pos1)
negative_example_img = read_and_resize(path_neg)
positive_example_2_img = read_and_resize(path_pos2)
positive_example_1_img = augment(positive_example_1_img)
negative_example_img = augment(negative_example_img)
positive_example_2_img = augment(positive_example_2_img)
list_positive_examples_1.append(positive_example_1_img)
list_negative_examples.append(negative_example_img)
list_positive_examples_2.append(positive_example_2_img)
A = preprocess_input(np.array(list_positive_examples_1))
B = preprocess_input(np.array(list_positive_examples_2))
C = preprocess_input(np.array(list_negative_examples))
label = None
yield {'anchor_input': A, 'positive_input': B, 'negative_input': C}, label
This is how I create the model:
def get_model():
base_model = efn.EfficientNetB3(weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = Dropout(0.6)(x)
x = Dense(embedding_dim)(x)
x = Lambda(lambda x: K.l2_normalize(x, axis=1), name="enc_out")(x)
embedding_model = Model(base_model.input, x, name="embedding")
input_shape = (image_size, image_size, 3)
anchor_input = Input(input_shape, name='anchor_input')
positive_input = Input(input_shape, name='positive_input')
negative_input = Input(input_shape, name='negative_input')
anchor_embedding = embedding_model(anchor_input)
positive_embedding = embedding_model(positive_input)
negative_embedding = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [anchor_embedding, positive_embedding, negative_embedding]
triplet_model = Model(inputs, outputs)
triplet_model.add_loss(K.mean(triplet_loss(outputs)))
return embedding_model, triplet_model
And this is how I'm trying to run the training:
if __name__ == '__main__':
data = pd.read_csv(path_csv)
train, test = train_test_split(data, train_size=0.7, random_state=1337)
file_id_mapping_train = {k: v for k, v in zip(train.Image.values, train.Id.values)}
file_id_mapping_test = {k: v for k, v in zip(test.Image.values, test.Id.values)}
gen_tr = gen(SampleGen(file_id_mapping_train))
gen_te = gen(SampleGen(file_id_mapping_test))
embedding_model, triplet_model = get_model()
for i, layer in enumerate(embedding_model.layers):
print(i, layer.name, layer.trainable)
for layer in embedding_model.layers[379:]:
layer.trainable = True
for layer in embedding_model.layers[:379]:
layer.trainable = False
triplet_model.compile(loss=None, optimizer=Adam(0.0001))
history = triplet_model.fit(x=gen_tr,
validation_data=gen_te,
epochs=10,
verbose=1,
steps_per_epoch=200,
validation_steps=20,
callbacks=create_callbacks())
The csv contains two columns (Image, Id) and I am generating triplets on the go using a generator. The layer 379 is the last layer of the network so I just leave that as trainable. I let it run for some epochs and it seems like it doesn't converge, it stays around 2.30. On epochs like 20, the loss is even higher than what I've started with. Here you can see what I mean: train example Is there anything wrong with the way I think about the problem?
Thank you!

ValueError: No gradients provided for any variable tensorflow 2.0

I am using tensorflow 2.0 and trying to make a actor critic algorithm to play the game of cartpole. I have done everything right but getting the following error: ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
Please help me out
Here is my code:
import gym
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
MAX_EPISODES = 2000
GAMMA = 0.9
LR_A = 0.001
LR_C = 0.01
env = gym.make("CartPole-v0")
N_ACTIONS = env.action_space.n
N_FEATURES = 4
def make_actor(n_features, n_actions):
inputs = tf.keras.Input(shape=[n_features])
hidden = tf.keras.layers.Dense(20, activation=tf.nn.relu)(inputs)
dist = tf.keras.layers.Dense(n_actions, activation=tf.nn.softmax)(hidden)
model = tf.keras.Model(inputs=inputs, outputs=dist)
return model
def make_critic(n_features):
inputs = tf.keras.Input(shape=[n_features])
hidden = tf.keras.layers.Dense(20, activation=tf.nn.relu)(inputs)
value = tf.keras.layers.Dense(1)(hidden)
model = tf.keras.Model(inputs=inputs, outputs=value)
return model
actor = make_actor(N_FEATURES, N_ACTIONS)
critic = make_critic(N_FEATURES)
actor.summary()
critic.summary()
actor_optimizer = tf.keras.optimizers.Adam(LR_A)
critic_optimizer = tf.keras.optimizers.Adam(LR_C)
def loss_actor(s, a, td_error):
dist = actor(s.reshape(1, 4)).numpy()
log_prob = np.log(dist[0, a])
exp_v = np.mean(log_prob * td_error)
return tf.multiply(exp_v, -1)
def loss_critic(s, s_, r, gamma):
s, s_ = s[np.newaxis, :], s_[np.newaxis, :]
v = critic(s)
v_ = critic(s_)
td_error = r + gamma * v_ - v
return tf.multiply(td_error, 1)
def train(max_episodes):
for episode in range(max_episodes):
s = env.reset().astype(np.float32)
t = 0
track_r = []
while True:
dist = actor(s.reshape(1, 4)).numpy()
a = np.random.choice(range(N_ACTIONS), p=dist.ravel())
s_, r, done, info = env.step(a)
s_ = s_.astype(np.float32)
if done: r=-20
track_r.append(r)
with tf.GradientTape() as cri_tape, tf.GradientTape() as act_tape:
td_error = loss_critic(s, s_, r, GAMMA)
gradient = cri_tape.gradient(td_error, critic.trainable_variables)
critic_optimizer.apply_gradients(zip(gradient,critic.trainable_variables))
with tf.GradientTape() as act_tape:
neg_exp_v = loss_actor(s, a, td_error.numpy())
gradient = act_tape.gradient(neg_exp_v, critic.trainable_variables)
actor_optimizer.apply_gradients(zip(gradient, actor.trainable_variables))
s = s_
t += 1
if done:
print("Episode:{} Steps:{}".format(episode+1, t))
train(MAX_EPISODES)
The error is on line 69:actor_optimizer.apply_gradients(zip(gradient, actor.trainable_variables))
When I tried to print out the gradients for the actor the result was None.
I am really not getting where the problem is.

Convolve 2 tensors within a model - Keras

I need to convolve 2 tensors and then apply convolutions to the output.
I am using a lambda layer for this but I am not able to make this work.
def corr(input):
a = input[0]
b = input[1]
x = tf.nn.convolution(a, b, padding='SAME')
return x
# Apply on im1
filtered_im1 = conv1(i1)
filtered_im1 = conv2(filtered_im1)
filtered_im1 = pool(filtered_im1)
drop_filtered_im1 = drop(filtered_im1)
filtered_im1 = conv3(drop_filtered_im1)
filtered_im1 = conv4(filtered_im1)
filtered_im1 = pool2(filtered_im1)
im = drop2(filtered_im1)
# Concatenate Im with flow
imflow = Lambda(corr)([im, conv_f2]

Calling a basic LSTM cell within a custom Tensorflow cell

I'm trying to implement the MATCH LSTM from this paper: https://arxiv.org/pdf/1608.07905.pdf
I'm using Tensorflow. One part of the architecture is an RNN that uses the input and the previous state to compute an attention vector which it applies to a context before concatenating the result with the inputs and sending them into an LSTM. To build the first part of this RNN, I wrote a custom cell for Tensorflow to call. But I'm not sure how to send the results into an LSTM. Is it possible to call the basic LSTM cell within the custom cell I'm writing? I tried this a few different ways but kept getting the error "module' object has no attribute 'rnn_cell'" at the line where the LSTM cell is called. Any help would be much appreciated!
EDIT to add code:
import numpy as np
import tensorflow as tf
class MatchLSTMCell(tf.contrib.rnn.RNNCell):
def __init__(self, state_size, question_tensor, encoded_questions, batch_size):
self._state_size = state_size
self.question_tensor = question_tensor
self.encoded_questions = encoded_questions
self.batch_size = batch_size
#property
def state_size(self):
return self._state_size
#property
def output_size(self):
return self._state_size
def __call__(self, inputs, state, scope=None):
scope = scope or type(self).__name__
with tf.variable_scope(scope):
W_p = tf.get_variable("W_p", dtype=tf.float64, shape=[self.state_size, self.state_size], initializer=tf.contrib.layers.xavier_initializer())
W_r = tf.get_variable("W_r", dtype=tf.float64, shape=[self.state_size, self.state_size], initializer=tf.contrib.layers.xavier_initializer())
b_p = tf.get_variable("b_p", dtype=tf.float64, shape=[self.state_size])
w = tf.get_variable("w", dtype=tf.float64, shape=[1,self.state_size])
b = tf.get_variable("b", dtype=tf.float64, shape=[])
#print 'question tensor', np.shape(self.question_tensor)
#print 'inputs', np.shape(inputs)
#print 'insides', np.shape(tf.matmul(inputs, W_p) + tf.matmul(state, W_r) + b_p)
G = tf.nn.tanh(
tf.transpose(tf.transpose(self.question_tensor, perm=[1,0,2]) +
(tf.matmul(inputs, W_p) + tf.matmul(state, W_r) + b_p), perm=[1,0,2])
)
#print 'big G', np.shape(G)
attention_list = []
for i in range(self.batch_size):
attention_matrix = tf.matmul(G[i,:,:], tf.transpose(w))
attention_list.append(attention_matrix)
attention_scores = tf.stack(attention_list)
a = tf.nn.softmax(attention_scores + b)
a = tf.reshape(a, [self.batch_size, -1])
#print 'a shape is', np.shape(a)
weighted_question_list = []
for i in range(self.batch_size):
attention_vector = tf.matmul(tf.reshape(a[i], [1,-1]), self.encoded_questions[i])
weighted_question_list.append(attention_vector)
weighted_questions = tf.stack(weighted_question_list)
weighted_questions = tf.reshape(weighted_questions, [32, -1])
#print'weighted questions', np.shape(weighted_questions)
z = tf.concat([inputs, weighted_questions], 1)
lstm_cell = tf.nn.rnn_cell.LSTMCell(self.state_size)
output, new_state = lstm_cell.__call__(z, state)
return output, new_state
I'm also trying to reimplement Match_LSTM for Squad for experiment.
I use MurtyShikhar's as reference. It works! However, he had to customize AttentionWrapper and use existed BasicLSTM cell.
I also try to create a Match_LSTM_cell by putting z and state as (inputs,state) pair in Basic_LSTM:
def __call__(self, inputs,state):
#c is not a output. c somehow is a "memory keeper".
#Necessary to update and pass new_c through LSTM
c,h=state
#...Calculate your z
#...inputs will be each tokens in context(passage) respectively
#...Calculate alpha_Q
z=tf.concat([inputs,alpha_Q],axis=1)
########This part is reimplement of Basic_LSTM
with vs.variable_scope("LSTM_core"):
sigmoid=math_ops.sigmoid
concat=_linear([z,h],dimension*4,bias=True)
i,j,f,o=array_ops.split(concat,num_or_size_splits=4,axis=1)
new_c=(c*sigmoid(f+self._forget_bias)+sigmoid(i)*self._activation(j))
new_h = self._activation(new_c) * sigmoid(o)
new_state=(new_c,new_h)
return new_h,new_state

Theano function equivalent in Tensorflow

I'm wonder wrt this topic
I want to resolve update issue in Theano.function with this lazy tensorflow constrution:
class TensorFlowTheanoFunction(object):
def __init__(self, inputs, outputs, session):
self._inputs = inputs
self._outputs = outputs
self.session = session
def __call__(self, *args, **kwargs):
feeds = {}
for (argpos, arg) in enumerate(args):
feeds[self._inputs[argpos]] = arg
return self.session.run(self._outputs, feeds)
If I want to pass an update argument (like in Theano) how I can modify this lazy call?
I just want that this can also work in tensorflow:
self.new = theano.function([], [], updates=zip(old_params, params))
Just modifying Yaroslav's code from that thread to use tf.assign, with a control dependency to make sure the outputs are computed before the assignments happen:
import tensorflow as tf
class TensorFlowTheanoFunction(object):
def __init__(self, inputs, outputs, updates=()):
self._inputs = inputs
self._outputs = outputs
self._updates = updates
def __call__(self, *args, **kwargs):
feeds = {}
for (argpos, arg) in enumerate(args):
feeds[self._inputs[argpos]] = arg
try:
outputs_identity = [tf.identity(output) for output in self._outputs]
output_is_list = True
except TypeError:
outputs_identity = [tf.identity(self._outputs)]
output_is_list = False
with tf.control_dependencies(outputs_identity):
assign_ops = [tf.assign(variable, replacement)
for variable, replacement in self._updates]
outputs_list = tf.get_default_session().run(
outputs_identity + assign_ops, feeds)[:len(outputs_identity)]
if output_is_list:
return outputs_list
else:
assert len(outputs_list) == 1
return outputs_list[0]
a = tf.placeholder(dtype=tf.int32)
b = tf.placeholder(dtype=tf.int32)
variable = tf.get_variable(
"variable", shape=[], dtype=tf.int32, initializer=tf.zeros_initializer)
c = a + b + variable
d = a - b
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
f = TensorFlowTheanoFunction([a, b], [c, d], updates=[(variable, variable + 1)])
print f(1, 2)
print f(1, 2)
print f(0, 2)
f = TensorFlowTheanoFunction([a, b], c, updates=[(variable, variable + 1)])
print f(1, 2)
print f(1, 2)
print f(0, 2)
This updates the variable at each iteration:
[3, -1]
[4, -1]
[4, -2]
6
7
7