Am I using tf.get_variable() correctly? - tensorflow

I read from here that it is recommended to always use tf.get_variable(...) although this seems a bit troublesome when I'm trying to implement a network.
For example:
def create_weights(shape, name = 'weights',\
initializer = tf.random_normal_initializer(0, 0.1)):
weights = tf.get_variable(name, shape, initializer = initializer)
print("weights created named: {}".format(weights.name))
return(weights)
def LeNet(in_units, keep_prob):
# define the network
with tf.variable_scope("conv1"):
conv1 = conv(in_units, create_weights([5, 5, 3, 32]), create_bias([32]))
pool1 = maxpool(conv1)
with tf.variable_scope("conv2"):
conv2 = conv(pool1, create_weights([5, 5, 32, 64]), create_bias([64]))
pool2 = maxpool(conv2)
# reshape the network to feed it into the fully connected layers
with tf.variable_scope("flatten"):
flatten = tf.reshape(pool2, [-1, 1600])
flatten = dropout(flatten, keep_prob)
with tf.variable_scope("fc1"):
fc1 = fc(flatten, create_weights([1600, 120]), biases = create_bias([120]))
fc1 = dropout(fc1, keep_prob)
with tf.variable_scope("fc2"):
fc2 = fc(fc1, create_weights([120, 84]), biases = create_bias([84]))
with tf.variable_scope("logits"):
logits = fc(fc2, create_weights([84, 43]), biases = create_bias([43]))
return(logits)
I have to use with tf_variable_scope(...) every single time I call create_weights, and furthermore, say if I wanted to change the conv1 variable's weights to [7, 7, 3, 32] instead of [5, 5, 3, 32] I would have to restart the kernel as the variable already exists. On the other hand if I use tf.Variable(...) I wouldn't have any of these problems.
Am I using tf.variable_scope(...) incorrectly?

It seems that you cannot change what already exists in a variable scope, thus only when you restart the kernel, you can change a variable that you defined before.(In fact you create a new one because the previous one has been deleted)
...
that is only my guess...I will appreciate it if someone can give a detailed answer.

Related

How to backprop through a model that predicts the weights for another in Tensorflow

I am currently trying to train a model (hypernetwork) that can predict the weights for another model (main network) such that the main network's cross-entropy loss decreases. However when I use tf.assign to assign the new weights to the network it does not allow backpropagation into the hypernetwork thus rendering the system non-differentiable. I have tested whether my weights are properly updated and they seem to be since when subtracting initial weights from updated ones is a non zero sum.
This is a minimal sample of what I am trying to achieve.
import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import softmax
def random_addition(variables):
addition_update_ops = []
for variable in variables:
update = tf.assign(variable, variable+tf.random_normal(shape=variable.get_shape()))
addition_update_ops.append(update)
return addition_update_ops
def network_predicted_addition(variables, network_preds):
addition_update_ops = []
for idx, variable in enumerate(variables):
if idx == 0:
print(variable)
update = tf.assign(variable, variable + network_preds[idx])
addition_update_ops.append(update)
return addition_update_ops
def dense_weight_update_net(inputs, reuse):
with tf.variable_scope("weight_net", reuse=reuse):
output = tf.layers.conv2d(inputs=inputs, kernel_size=(3, 3), filters=16, strides=(1, 1),
activation=tf.nn.leaky_relu, name="conv_layer_0", padding="SAME")
output = tf.reduce_mean(output, axis=[0, 1, 2])
output = tf.reshape(output, shape=(1, output.get_shape()[0]))
output = tf.layers.dense(output, units=(16*3*3*3))
output = tf.reshape(output, shape=(3, 3, 3, 16))
return output
def conv_net(inputs, reuse):
with tf.variable_scope("conv_net", reuse=reuse):
output = tf.layers.conv2d(inputs=inputs, kernel_size=(3, 3), filters=16, strides=(1, 1),
activation=tf.nn.leaky_relu, name="conv_layer_0", padding="SAME")
output = tf.reduce_mean(output, axis=[1, 2])
output = tf.layers.dense(output, units=2)
output = softmax(output)
return output
input_x_0 = tf.zeros(shape=(32, 32, 32, 3))
target_y_0 = tf.zeros(shape=(32), dtype=tf.int32)
input_x_1 = tf.ones(shape=(32, 32, 32, 3))
target_y_1 = tf.ones(shape=(32), dtype=tf.int32)
input_x = tf.concat([input_x_0, input_x_1], axis=0)
target_y = tf.concat([target_y_0, target_y_1], axis=0)
output_0 = conv_net(inputs=input_x, reuse=False)
target_y = tf.one_hot(target_y, 2)
crossentropy_loss_0 = tf.losses.softmax_cross_entropy(onehot_labels=target_y, logits=output_0)
conv_net_parameters = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="conv_net")
weight_net_parameters = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="weight_net")
print(conv_net_parameters)
weight_updates = dense_weight_update_net(inputs=input_x, reuse=False)
#updates_0 = random_addition(conv_net_parameters)
updates_1 = network_predicted_addition(conv_net_parameters, network_preds=[weight_updates])
with tf.control_dependencies(updates_1):
output_1 = conv_net(inputs=input_x, reuse=True)
crossentropy_loss_1 = tf.losses.softmax_cross_entropy(onehot_labels=target_y, logits=output_1)
check_sum = tf.reduce_sum(tf.abs(output_0 - output_1))
c_opt = tf.train.AdamOptimizer(beta1=0.9, learning_rate=0.001)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Needed for correct batch norm usage
with tf.control_dependencies(update_ops): # Needed for correct batch norm usage
train_variables = weight_net_parameters #+ conv_net_parameters
c_error_opt_op = c_opt.minimize(crossentropy_loss_1,
var_list=train_variables,
colocate_gradients_with_ops=True)
init=tf.global_variables_initializer()
with tf.Session() as sess:
init = sess.run(init)
loss_list_0 = []
loss_list_1 = []
for i in range(1000):
_, checksum, crossentropy_0, crossentropy_1 = sess.run([c_error_opt_op, check_sum, crossentropy_loss_0,
crossentropy_loss_1])
loss_list_0.append(crossentropy_0)
loss_list_1.append(crossentropy_1)
print(checksum, np.mean(loss_list_0), np.mean(loss_list_1))
Does anyone know how I can get tensorflow to compute the gradients for this? Thank you.
In this case your weights aren't variables, they are computed tensors based on the hypernetwork. All you really have is one network during training. If I understand you correctly you are then proposing to discard the hypernetwork and be able to use just the main network to perform predictions.
If this is the case then you can either save the weight values manually and reload them as constants, or you could use tf.cond and tf.assign to assign them as you are doing during training, but use tf.cond to choose to use the variable or the computed tensor depending on whether you're doing training or inference.
During training you will need to use the computed tensor from the hypernetwork in order to enable backprop.
Example from comments, w is the weight you'll use, you can assign a variable during training to keep track of it, but then use tf.cond to either use the variable (during inference) or the computed value from the hypernetwork (during training). In this example you need to pass in a boolean placeholder is_training_placeholder to indicate if you're running training of inference.
tf.assign(w_variable, w_from_hypernetwork)
w = tf.cond(is_training_placeholder, true_fn=lambda: w_from_hypernetwork, false_fn=lambda: w_variable)

How to share weights using tf.layers.conv2d

I have built an autoencoder using tf.layers.conv2d layers and would like to train it in phases. That is to train the outer layers first then the middle layers and then the inner. I understand this is possible using tf.nn.conv2d because the weights are declared using tf.get_variable but I would think this should also be possible using tf.layers.conv2d.
If I enter a new variable scope different from the original graph to change the inputs to the convolutional layers (i.e. skip the inner layers during phase 1) I am not able to reuse the weights. If I do not enter a new variable scope I am not able to freeze the weights that I dont want to train in this phase.
Basically I am trying to use the training method from Aurélien Géron here https://github.com/ageron/handson-ml/blob/master/15_autoencoders.ipynb
Except I would like to use a cnn instead of dense layers. How to do it?
No need to create the variables by hand. This works just as well:
import tensorflow as tf
inputs_1 = tf.placeholder(tf.float32, (None, 512, 512, 3), name='inputs_1')
inputs_2 = tf.placeholder(tf.float32, (None, 512, 512, 3), name='inputs_2')
with tf.variable_scope('conv'):
out_1 = tf.layers.conv2d(inputs_1, 32, [3, 3], name='conv_1')
with tf.variable_scope('conv', reuse=True):
out_2 = tf.layers.conv2d(inputs_2, 32, [3, 3], name='conv_1')
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
print(tf.trainable_variables())
If you give tf.layers.conv2d the same name, it will use the same weights (assuming reuse=True, otherwise there will be a ValueError).
In Tesorflow 2.0: tf.layers were replaced by keras layers where the variables are reused by using the same layer object:
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, activation='relu',
input_shape=(512, 512, 3)),
])
#tf.function
def f1(x):
return model(x)
#tf.function
def f2(x):
return model(x)
Both f1 and f2 will use the layer with the same variables
I'd recommend setting it up a little bit differently. Instead of using tf.layers.conv2d, I would explicitly make the weights using calls to tf.get_variable() and then use these weights with calls to tf.nn.conv2d(). This way, you don't blackbox the variable creation, and can reference them easily. It's also a good way to learn exactly what's going on in your network, since you wrote the shapes for every set of weights by hand!
Sample (untested) code:
inputs = tf.placeholder(tf.float32, (batch_size, 512, 512, 3), name='inputs')
weights = tf.get_variable(name='weights', shape=[5, 5, 3, 16], dtype=tf.float32)
with tf.variable_scope("convs"):
hidden_layer_1 = tf.nn.conv2d(input=inputs, filter=weights, stride=[1, 1, 1, 1], padding="SAME")
with tf.variable_scope("convs", reuse=True):
hidden_layer_2 = tf.nn.conv2d(input=hidden_layer_1, filter=weights,stride=[1, 1, 1, 1], padding="SAME"
This creates convolutional weights and applies it twice to your input. I haven't tested this code, so there may be bugs, but it's about how it should look. References here for variable sharing and here for tf.nn.conv2d.
Hopefully that helps! I would be more thorough, but I have no idea what your code looks like.

why if we use "tf.make_template()" in training stage, we must use tf.make_template() again in testing stage

I defined a model function which named "drrn_model". While I was training my model, I use model by:
shared_model = tf.make_template('shared_model', drrn_model)
train_output = shared_model(train_input, is_training=True)
It begin training step by step, and I can restore .ckpt file to the model when I want to continue to train the model from an old point.
But there is a problem when I test my trained model.
I use the code below directly without using tf.make_template:
train_output = drrn_model(train_input, is_training=False)
Then the terminal gave me a lots of NotFoundError like "Key LastLayer/Variable_2 not found in checkpoint".
But when I use
shared_model = tf.make_template('shared_model', drrn_model)
output_tensor = shared_model(input_tensor,is_training=False)
It can test normally.
So why we must use tf.make_template() again in testing stage. What is the difference between drrn_model and make_template when we construct our model.
And there is another question: the BN layer in tensorflow.
I have tried many ways but the outputs is always wrong(always worse then the version without BN layer).
There is my newest version of model with BN layer:
tensor = None
def drrn_model(input_tensor, is_training):
with tf.device("/gpu:0"):
with tf.variable_scope("FirstLayer"):
conv_0_w = tf.get_variable("conv_w", [3, 3, 1, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
tensor = tf.nn.conv2d(tf.nn.relu(batchnorm(input_tensor, is_training= is_training)), conv_0_w, strides=[1,1,1,1], padding="SAME")
first_layer = tensor
### recursion ###
with tf.variable_scope("recycle", reuse=False):
tensor = drrnblock(first_layer, tensor, is_training)
for i in range(1,10):
with tf.variable_scope("recycle", reuse=True):
tensor = drrnblock(first_layer, tensor, is_training)
### end layer ###
with tf.variable_scope("LastLayer"):
conv_end_w = tf.get_variable("conv_w", [3, 3, 128, 1], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv_end_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(tensor, is_training= is_training)), conv_end_w, strides=[1, 1, 1, 1], padding='SAME')
tensor = tf.add(input_tensor,conv_end_layer)
return tensor
def drrnblock(first_layer, input_layer, is_training):
conv1_w = tf.get_variable("conv1__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv1_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(input_layer, is_training= is_training)), conv1_w, strides=[1,1,1,1], padding= "SAME")
conv2_w = tf.get_variable("conv2__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv2_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(conv1_layer, is_training=is_training)), conv2_w, strides=[1, 1, 1, 1], padding="SAME")
tensor = tf.add(first_layer, conv2_layer)
return tensor
def batchnorm(inputs, is_training, decay = 0.999):# there is my BN layer
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training:
batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2])
print("batch_mean.shape: ", batch_mean.shape)
train_mean = tf.assign(pop_mean, pop_mean*decay+batch_mean*(1-decay))
train_var = tf.assign(pop_var, pop_var*decay+batch_var*(1-decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,batch_mean,batch_var,beta,scale,variance_epsilon=1e-3)
else:
return tf.nn.batch_normalization(inputs,pop_mean,pop_var,beta,scale,variance_epsilon=1e-3)
Please tell me where is wrong in my code.
Thanks a lot!!

Variables of tensorflow generate error in a loop

I have a similar problem to TensorFlow: varscope.reuse_variables().
I am doing cross-validation on a dataset.
Each time I call a function e.g. myFunctionInFile1()) with new data (currently due to limited space, I am omitting the data assigning details). This function is not in same python file. Because of Which I import this function from that file in my main python file (file2). This function creates a complete CNN and train and test a model on the given training and testing data with newly initialized and trained parameters.
From the main file (file2), at first validation, myFunctionInFile1 is called and the CNN model train and test it and returns results to the main file (file2). However, in the second iteration with new data, following code:
def myFunctionInFile1():
# Nodes for the input variables
x = tf.placeholder("float", shape=[None, D], name='Input_data')
y_ = tf.placeholder(tf.int64, shape=[None], name='Ground_truth')
keep_prob = tf.placeholder("float")
bn_train = tf.placeholder(tf.bool) # Boolean value to guide batchnorm
def bias_variable(shape, name):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial, name=name)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
with tf.name_scope("Reshaping_data") as scope:
x_image = tf.reshape(x, [-1, D, 1, 1])
initializer = tf.contrib.layers.xavier_initializer()
"""Build the graph"""
# ewma is the decay for which we update the moving average of the
# mean and variance in the batch-norm layers
with tf.name_scope("Conv1") as scope:
# reuse = tf.AUTO_REUSE
W_conv1 = tf.get_variable("Conv_Layer_1", shape=[5, 1, 1, num_filt_1], initializer=initializer)
b_conv1 = bias_variable([num_filt_1], 'bias_for_Conv_Layer_1')
a_conv1 = conv2d(x_image, W_conv1) + b_conv1
with tf.name_scope('Batch_norm_conv1') as scope:
a_conv1 = tf.contrib.layers.batch_norm(a_conv1,is_training=bn_train,updates_collections=None)
give me following error:
ValueError: Variable BatchNorm_2/beta does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=tf.AUTO_REUSE in VarScope?
What is the problem, as generally in programming in C/C++/Java if you exit a function, the local variables in that called function are deleted automatically at return. And at each time new call it should create a new set of parameters. Than why it gives this error. How can I fix this?
TensorFlow layers like batch_norm are implemented using tf.get_variable. tf.get_variable has a reuse argument (which it can also get from a variable_scope), defaulting to False, and when called with reuse=False it always creates variables. You can call it with reuse=True which means it will reuse existing variables or fail if the variables do not exist.
In your case you're calling batch norm with reuse=True for the first time, so it's having a hard time creating the variables. Try setting reuse=False in your variable scope or using, as the error message suggests, tf.AUTO_REUSE.

How do I use tensor board with tf.layers?

As the weights are not explicitly defined, how can I pass them to a summary writer?
For exemple:
conv1 = tf.layers.conv2d(
tf.reshape(X,[FLAGS.batch,3,160,320]),
filters = 16,
kernel_size = (8,8),
strides=(4, 4),
padding='same',
kernel_initializer=tf.contrib.layers.xavier_initializer(),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=None,
name = 'conv1',
activation = tf.nn.elu
)
=>
summarize_tensor(
??????
)
Thanks!
While Da Tong's answer is complete, it took me a while to realize how to use it. To save time for another beginner, you need to add the following to you code to add all trainable variables to the tensorboard summary:
for var in tf.trainable_variables():
tf.summary.histogram(var.name, var)
merged_summary = tf.summary.merge_all()
That depends on what you are going to record in TensorBoard. If you want to put every variables into TensorBoard, call tf.all_variables() or tf.trainable_variables() will give you all the variables. Note that the tf.layers.conv2d is just a wrapper of creating a Conv2D instance and call apply method of it. You can unwrap it like this:
conv1_layer = tf.layers.Conv2D(
filters = 16,
kernel_size = (8,8),
strides=(4, 4),
padding='same',
kernel_initializer=tf.contrib.layers.xavier_initializer(),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=None,
name = 'conv1',
activation = tf.nn.elu
)
conv1 = conv1_layer.apply(tf.reshape(X,[FLAGS.batch,3,160,320]))
Then you can use conv1_layer.kernel to access the kernel weights.