Tensorflow overwrites scope in custom layer - tensorflow

I am trying to implement a noisy linear layer in tensorflow, inheriting from tf.keras.layers.Layer . Everything works fine except for reusing variables. This seems to stem from some issue with the scoping: Whenever i use the add_weight function from the superclass and a weight with the same name already exists, it seems to ignore the given reuse-flag in the scope and creates a new variable instead. Interestingly, it does not add a 1 to the variable name in the end as usual in similar cases, but rather adds the 1 to the scope name.
import tensorflow as tf
class NoisyDense(tf.keras.layers.Layer):
def __init__(self,output_dim):
self.output_dim=output_dim
super(NoisyDense, self).__init__()
def build(self, input_shape):
self.input_dim = input_shape.as_list()[1]
self.noisy_kernel = self.add_weight(name='noisy_kernel',shape= (self.input_dim,self.output_dim))
def noisydense(inputs, units):
layer = NoisyDense(units)
return layer.apply(inputs)
inputs = tf.placeholder(tf.float32, shape=(1, 10),name="inputs")
scope="scope"
with tf.variable_scope(scope):
inputs3 = noisydense(inputs,
1)
my_variable = tf.get_variable("my_variable", [1, 2, 3],trainable=True)
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
inputs2 = noisydense(inputs,
1)
my_variable = tf.get_variable("my_variable", [1, 2, 3],trainable=True)
tvars = tf.trainable_variables()
init=tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
tvars_vals = sess.run(tvars)
for var, val in zip(tvars, tvars_vals):
print(var.name, val)
This results in the variables
scope/noisy_dense/noisy_kernel:0
scope_1/noisy_dense/noisy_kernel:0
scope/my_variable:0
being printed. I would like it to reuse the noisy kernel instead of creating a second one, as it is done for my_variable.

Related

TensorFlow - Error when using interleave or parallel_interleave

I'm using tf.data.Datasets of V1.12 API like this Q&A to read several .h5 files pre-saved batch per file in a directory.
I first made a generator:
class generator_yield:
def __init__(self, file):
self.file = file
def __call__(self):
with h5py.File(self.file, 'r') as f:
yield f['X'][:], f['y'][:]
Then make a list of filenames and passe them in Dataset:
def _fnamesmaker(dir, mode='h5'):
fnames = []
for dirpath, _, filenames in os.walk(dir):
for fname in filenames:
if fname.endswith(mode):
fnames.append(os.path.abspath(os.path.join(dirpath, fname)))
return fnames
fnames = _fnamesmaker('./')
len_fnames = len(fnames)
fnames = tf.data.Dataset.from_tensor_slices(fnames)
Apply the interleave method of Dataset:
# handle multiple files
ds = fnames.interleave(lambda filename: tf.data.Dataset.from_generator(
generator_yield(filename), output_types=(tf.float32, tf.float32),
output_shapes=(tf.TensorShape([100, 100, 1]), tf.TensorShape([100, 100, 1]))), cycle_length=len_fnames)
ds = ds.batch(5).shuffle(5).prefetch(5)
# init iterator
it = ds.make_initializable_iterator()
init_op = it.initializer
X_it, y_it = it.get_next()
Model:
# model
with tf.name_scope("Conv1"):
W = tf.get_variable("W", shape=[3, 3, 1, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b", shape=[1], initializer=tf.contrib.layers.xavier_initializer())
layer1 = tf.nn.conv2d(X_it, W, strides=[1, 1, 1, 1], padding='SAME') + b
logits = tf.nn.relu(layer1)
loss = tf.reduce_mean(tf.losses.mean_squared_error(labels=y_it, predictions=logits))
train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
Start session:
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(), init_op])
while True:
try:
data = sess.run(train_op)
print(data.shape)
except tf.errors.OutOfRangeError:
print('done.')
break
The Error looks like:
TypeError: expected str, bytes or os.PathLike object, not Tensor
At the init method of generator. Apparently when one applies interleave the it's a Tensor passes through to the generator
You cannot run the dataset object directly through sess.run. You have to define an iterator, get the next element. Try doing something like:
next_elem = files.make_one_shot_iterator.get_next()
data = sess.run(next_elem)
You should be able to get your tensors.
According to this post, my case won't benefit in performance with the parralel_interleave.
...have a transformation that transforms each element of a source
dataset into multiple elements into the destination dataset...
It's more relevant in the typical classification problem with datas (dog, cat...)saved in separate directories. We have a segmentation problem here which means that a label contains identical dimension of a input image. All datas are stocked in one directory and each .h5 file contains an image and its labels(masks)
Herein, a simple map with num_parallel_calls is sufficient.

Tensorflow: Replace one op with another(maybe even 2 ops)

My goal is to build a script to change an operation into another one using TF's graph editor. So far I tried making a script that just changes the input kernel weights of a Conv2D, but to no avail, as the interface is pretty confusing.
with tf.Session() as sess:
model_filename = sys.argv[1]
with gfile.FastGFile(model_filename, 'r') as f:
graph_def = graph_pb2.GraphDef()
text_format.Merge(f.read(), graph_def)
importer.import_graph_def(graph_def)
#my_sgv = ge.sgv("Conv2D", graph=tf.get_default_graph())
#print my_sgv
convs = find_conv2d_ops(tf.get_default_graph())
print convs
my_sgv = ge.sgv(convs)
print my_sgv
conv_tensor = tf.get_default_graph().get_tensor_by_name(convs[0].name + ':0')
conv_weights_input = tf.get_default_graph().get_tensor_by_name(convs[0].inputs[1].name)
weights_new = tf.Variable(tf.truncated_normal([1, 1, 1, 8], stddev=0.03),
name='Wnew')
ge.graph_replace(conv_tensor, {conv_weights_input: weights_new})
The error is "input needs to be a Tensor: ". Can someone please provide some insights?
Since you are dealing with a tf.Variable you don't need to use graph editor. tf.assign will be sufficient.
You can use it like the following:
assign_op = tf.assign(conv_weights_input, weights_new)
with tf.Session() as sess:
sess.run(assign_op)
If you are looking to sub out operations and not weights. Consider the following example (modified from this example):
import tensorflow as tf
import tensorflow.contrib.graph_editor as ge
def build():
a_pl = tf.placeholder(dtype=tf.float32, name="a")
b_pl = tf.placeholder(dtype=tf.float32, name="b")
c = tf.add(a_pl, b_pl, name="c")
build() #or load graph from disc
a = tf.constant(1.0, shape=[2, 3], name="a_const")
b = tf.constant(2.0, shape=[2, 3], name="b_const")
a_pl = tf.get_default_graph().get_tensor_by_name("a:0")
b_pl = tf.get_default_graph().get_tensor_by_name("b:0")
c = tf.get_default_graph().get_tensor_by_name("c:0")
c_ = ge.graph_replace(c, {a_pl: a, b_pl: b})
with tf.Session() as sess:
#no need for placeholders
print(sess.run(c_))
#will give error since a_pl and b_pl have no value
print(sess.run(c))
The issue with your code is that you're dealing with wights, and not tensors. The crux of the above example is that the first argument is the target tensor (output tensor) that have the to be replaced tensors as dependencies. The second argument are the actual tensors you want to replace.
It's also worth noting that conv_weights_input is actually a tensor, where weights_new is a tf.Variable. I believe what you want is to replace weights_new with a new conv operation with random weight initialisation.

TensorFlow: Variables in bijectors cannot be reused

Describe the problem
I am trying to reuse the weights and biases in the neural network within the MaskedAutoregressiveFlow bijector, by placing it within a tf.variable_scope with reuse=tf.AUTO_REUSE. But found that the weights and biases are not reused in practice.
Reproduce
import tensorflow as tf
from tensorflow.contrib.distributions.python.ops import bijectors as tfb
def get_bijector(name='my_bijector', reuse=None):
"""Returns a MAF bijector."""
with tf.variable_scope(name, reuse=reuse):
shift_and_log_scale_fn = \
tfb.masked_autoregressive_default_template([128])
return tfb.MaskedAutoregressiveFlow(shift_and_log_scale_fn)
x = tf.placeholder(shape=[None, 64], dtype='float32', name='x')
bijector_0 = get_bijector(reuse=tf.AUTO_REUSE)
y_0 = bijector_0.forward(x)
bijector_1 = get_bijector(reuse=tf.AUTO_REUSE)
y_1 = bijector_1.forward(x)
# We were expecting that the `y_0` and `y_1` share the same dependent variables,
# since we used `tf.AUTO_REUSE` within the `tf.variable_scope`. However, the following
# will return a `False`.
print(get_dependent_variables(y_0) == get_dependent_variables(y_1))
wherein we have employed the function that gains all the variables a tensor depends on:
import collections
def get_dependent_variables(tensor):
"""Returns all variables that the tensor `tensor` depends on.
Forked from: https://stackoverflow.com/a/42861919/1218716
Args:
tensor: Tensor.
Returns:
List of variables.
"""
# Initialize
starting_op = tensor.op
dependent_vars = []
queue = collections.deque()
queue.append(starting_op)
op_to_var = {var.op: var for var in tf.trainable_variables()}
visited = {starting_op}
while queue:
op = queue.popleft()
try:
dependent_vars.append(op_to_var[op])
except KeyError:
# `op` is not a variable, so search its inputs (if any).
for op_input in op.inputs:
if op_input.op not in visited:
queue.append(op_input.op)
visited.add(op_input.op)
return dependent_vars

How do I load a checkpoint using tensorflow in eager execution mode?

I am using tensorflow 1.7.0 in eager execution mode. I have the model working, but none of the examples that I have found for saving the model work.
This is the code that I am using:
checkpoint_directory ='./JokeWords/'
checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
checkpoint = tfe.Checkpoint(model=model,optimizer=optimizer) # save as "x"
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
evaluate(model,jokes,2,32)
....
checkpoint.save(file_prefix=checkpoint_prefix)
I have trained the model and use evaluate to check the results when loading from a restart. Each time I get a random result from evaluate, meaning that the model is not loading from the data, but instead only having random weights.
How do I save the model? It can take days to train one of these.
Edit. Here is the model:
class EagerRNN(tfe.Network):
def __init__(self,embedding, hidden_dim, num_layers, keep_ratio):
super(EagerRNN, self).__init__()
self.keep_ratio = keep_ratio
self.cells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.backcells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.linear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.backlinear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.attension = layers.Dense(hidden_dim, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
def call(self, input_seq,seq_lengths, training):
lengths=[i[0] for i in seq_lengths]
nRotations=max(lengths)
batchSize=input_seq.shape[0]
input_seq2 = tf.unstack(input_seq, num=int(input_seq.shape[1]), axis=1)
atten = None
state = self.cells[0].zero_state(batchSize, tf.float32)
for i in range(0,nRotations):
for j in range(0,len(self.cells)):
c=self.cells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
if atten==None:
atten =self.linear(output)
else:
atten=atten+self.linear(output)
for i in range(nRotations-1,-1,-1):
for j in range(0,len(self.backcells)):
c=self.backcells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
atten=atten+self.backlinear(output)
#input_seq = tf.stack(input_seq2[0:nRotations], axis=1)
atten=self.attension(atten)
if training:
input_seq = tf.nn.dropout(input_seq, self.keep_ratio)
# Returning a list instead of a single tensor so that the line:
# y = self.rnn(y, ...)[0]
# in PTBModel.call works for both this RNN and CudnnLSTM (which returns a
# tuple (output, output_states).
return input_seq,state,atten
def _add_cells(self, cells):
# "Magic" required for keras.Model classes to track all the variables in
# a list of Layer objects.
# TODO(ashankar): Figure out API so user code doesn't have to do this.
for i, c in enumerate(cells):
setattr(self, "cell-%d" % i, c)
return cells
class EagerLSTM_Model(tfe.Network):
"""LSTM for word language modeling.
Model described in:
(Zaremba, et. al.) Recurrent Neural Network Regularization
http://arxiv.org/abs/1409.2329
See also:
https://github.com/tensorflow/models/tree/master/tutorials/rnn/ptb
"""
def __init__(self,
embedding,
hidden_dim,
num_layers,
dropout_ratio,
use_cudnn_rnn=True):
super(EagerLSTM_Model, self).__init__()
self.keep_ratio = 1 - dropout_ratio
self.use_cudnn_rnn = use_cudnn_rnn
self.embedding = embedding
if self.use_cudnn_rnn:
self.rnn = cudnn_rnn.CudnnLSTM(
num_layers, hidden_dim, dropout=dropout_ratio)
else:
self.rnn = EagerRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
self.unrnn = EagerUnRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
def callRNN(self, input_seq,seq_lengths, training):
y = self.embedding.callbatchword(input_seq)
if training:
y = tf.nn.dropout(y, self.keep_ratio)
y,state,atten = self.rnn.call(y,seq_lengths, training=training)
return state,atten
def callUnRNN (self,state,atten,seq_lengths, training ):
x,state = self.unrnn(state,atten,seq_lengths,training=training)
#b=tf.reshape(y, self._output_shape)
#c=self.linear(b)
return x
tfe.Network is not (easily) Checkpointable and it will soon be deprecated. Prefer to subclass tf.Keras.Model instead. So if you change class EagerRNN(tfe.Network) to class EagerRNN(tf.keras.Model) and class EagerLSTM_Model(tfe.Network) to class EagerLSTM_Model(tf.keras.Model), checkpoint.save(file_prefix=checkpoint_prefix) should actually save all your variables and checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) should restore them.

Tensorflow, how to pass MultiRNN state in feed_dict

I am trying to make a generative RNN model in tensorflow. What is annoying me is that with the new switch to state_is_tupe being true by default in the RNN library, I am having a hard time finding the best way to save state between batches. I know I can change it back to being False but I don't want to do it since it is deprecated. When I am done with the training I need to be able to perserve the hidden states between calls to session.run since I will be generating the sequences one sample at a time. I figured out that I can return the state of the rnn as follows.
rnn = tf.nn.rnn_cell.MultiRNNCell(cells)
zero_state = rnn.zero_state(batch_size, tf.float32)
output, final_state = tf.nn.dynamic_rnn(rnn, self.input_sound, initial_state = zero_state)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
state_output = sess.run(final_state, feed_dict = {self.input_sound: np.zeros((64, 32, 512))})
This would be great but the issue emerges when I want to pass state_output back into the model. Since a placeholder can only be a tensor object I can't pass it back the state_output tupel.
I am looking for a very generic solution. The rnn could be a MultiRNNCell or a single LSTMCell or any other combination imaginable.
I think I figured it out. I used the following code to flatten the state tuples into a single 1D tensor. I can than chop it up when I pass it back into the model according to the size specification of the rnn cell.
def flatten_state_tupel(x):
result = []
for x_ in x:
if isinstance(x_, tf.Tensor) or not hasattr(x_, '__iter__'):
result.append(x_)
else:
result.extend(flatten_state_tupel(x_))
return result
def pack_state_tupel(state):
return tf.concat(0, [tf.reshape(s, (-1,)) for s in flatten_state_tupel(state)])
def unpack_state_tupel(state, size):
state = tf.reshape(state, (-1, tf.reduce_sum(flatten_state_tupel(size))))
def _make_state_tupel(sz, i):
if hasattr(sz, '__iter__'):
result = []
for s in sz:
base_index, y = _make_state_tupel(s, i)
result.append(y)
return base_index, tf.nn.rnn_cell.LSTMStateTuple(*result) if isinstance(sz, tf.nn.rnn_cell.LSTMStateTuple) else tuple(result)
else:
return i + sz, state[..., i : i + sz]
return _make_state_tupel(size, 0)[-1]
I use the functions as follows.
rnn = tf.nn.rnn_cell.MultiRNNCell(cells)
zero_state = pack_state_tupel(rnn.zero_state(batch_size, tf.float32))
self.initial_state = tf.placeholder_with_default(zero_state, None)
output, final_state = tf.nn.dynamic_rnn(rnn, self.input_sound, initial_state = unpack_state_tupel(self.initial_state, rnn.state_size))
packed_state = pack_state_tupel(final_state)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
state_output = sess.run(packed_state, feed_dict = {self.input_sound: np.zeros((64, 32, 512))})
print(state_output.shape)
state_output = sess.run(packed_state, feed_dict = {self.input_sound: np.zeros((64, 32, 512)), self.initial_state: np.zeros(state_output.shape[0])})
print(state_output)
This way it will zero the state if I do not pass anything (which will be the case during training) however I can save and pass the state between batches during generation.