Training custom-made CNN model in eager execution programming environment - tensorflow

I have built CNN model by using the principle of "Model Sublclassing" in Keras. Here is the class which represents my model:
class ConvNet(tf.keras.Model):
def __init__(self, data_format, classes):
super(ConvNet, self).__init__()
if data_format == "channels_first":
axis = 1
elif data_format == "channels_last":
axis = -1
self.conv_layer1 = tf.keras.layers.Conv2D(filters = 32, kernel_size = 3,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer1 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))
self.conv_layer2 = tf.keras.layers.Conv2D(filters = 64, kernel_size = 3,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer2 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))
self.conv_layer3 = tf.keras.layers.Conv2D(filters = 128, kernel_size = 5,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer3 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (1,1),
padding = "same")
self.flatten = tf.keras.layers.Flatten()
self.dense_layer1 = tf.keras.layers.Dense(units = 512, activation = "relu")
self.dense_layer2 = tf.keras.layers.Dense(units = classes, activation = "softmax")
def call(self, inputs, training = True):
output_tensor = self.conv_layer1(inputs)
output_tensor = self.pool_layer1(output_tensor)
output_tensor = self.conv_layer2(output_tensor)
output_tensor = self.pool_layer2(output_tensor)
output_tensor = self.conv_layer3(output_tensor)
output_tensor = self.pool_layer3(output_tensor)
output_tensor = self.flatten(output_tensor)
output_tensor = self.dense_layer1(output_tensor)
return self.dense_layer2(output_tensor)
I would like to know how to train it "eagerly", and by that I mean avoiding the use of compile and fit methods.
I am not sure how to exactly construct the training loop. I understand that I must perform tf.GradientTape.gradient() function in order to calculate the gradients and then use optimizers.apply_gradients() in order to update my model parameters.
What I do not understand is how can I make predictions with my model in order to get logits and then use them to calculate the loss. If someone could help me with the idea of how to construct the training loop I would really appreciate it.

Eager execution is the imperative programming mode to let developers follow Python's natural control flow. Essentially, you wouldn't need to first create placeholders, computational graphs and then execute them in TensorFlow sessions. You can use automatic differentiation to compute gradients in your training loop:
for i in range(iterations):
with tf.GradientTape() as tape:
logits = model(batch_examples, training = True)
loss = tf.losses.sparse_softmax_cross_entropy(batch_labels, logits)
grads = tape.gradient(loss, model.trainable_variables)
opt.apply_gradients([grads, model.trainable_variables])
This is assuming that the model is of the class Model from Keras. I hope this solves your problem! You should also check out the TensorFlow Guide on Eager Execution.

Related

Combine keras model horizontally, one of them pretrained

I have two functional Keras models in the same level (same input and output shape), one of them is pre-trained, I would like to combine them horizontally and then retrain the whole model. I mean I want to initialize the pretrained with its weights and the other one randomly. How can I horizontally combine them by adding them in branches (not concatenate)?
def define_model_a(input_shape, initializer, outputs = 1):
input_layer = Input(shape=(input_shape))
# first path
path10 = input_layer
path11 = Conv1D(filters=1, kernel_size=3, strides=1, padding="same", use_bias = True, kernel_initializer=initializer)(path10)
path12 = Lambda(lambda x: abs(x))(path11)
output = Add()([path10, path12])
define_model_a = Model(inputs=input_layer, outputs=output)
define_model_a._name = 'model_a'
return define_model_a
def define_model_b(input_shape, initializer, outputs = 1):
input_layer = Input(shape=(input_shape))
# first path
path10 = input_layer
path11 = Conv1D(filters=1, kernel_size=3, strides=1, padding="same", use_bias = True, kernel_initializer=initializer)(path10)
path12 = ReLU()(path11)
path13 = Dense(1, use_bias = True)(path12)
output = path13
define_model_b = Model(inputs=input_layer, outputs=output)
define_model_b._name = 'model_b'
return define_model_b
def define_merge_interpretation()
????
????
output = Add()(model_a, model_b)
model = Model(inputs=input_layer, outputs=output)
return model
initializer = tf.keras.initializers.HeNormal()
model_a = define_model_a(input_shape, initializer, outputs = 1)
model_b = define_model_b(input_shape, initializer, outputs = 1)
model_a.load_weights(load_path)
merge_interpretation = def merge_interprettation( )
history = merge_interpretation.fit(......
As reference, I am looking for a final structure like this in the image, but with some pretrained branches.

Input layer 0 of sequence is incompatible with the layer - CNNs

I am trying to create a CNN model using hyperparameterization for image classification. When I run the code I receive the following error:
ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 32, 32, 32, 3), found shape=(32, 32, 32, 3)
How to fix the error? Here is the whole code pasted below:
# first we create our actual code which requires the arguments, units, activation, dropout, lr:
def build_model(hp):
model = ks.Sequential([
# adding first conv2d layer
ks.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_1_filter", min_value=1,max_value=100, step = 16),
kernel_size = hp.Choice("conv_1_kernel", values = [3,5]),
activation = hp.Choice("conv_1_activation", ["relu", "tanh", "softmax"]),
input_shape = (32,32,32,3)
),
# adding second conv2d layer
ks.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_2_filter", min_value=1,max_value=50, step = 16),
kernel_size = hp.Choice("conv_2_kernel", values = [3,5]),
activation = hp.Choice("conv_2_activation", ["relu", "tanh", "softmax"]),
input_shape = (32,32,32,3)
)])
model.add(layers.Flatten())
# Let's tune the number of Dense layers.
for i in range(hp.Int("num_dense_layers", 1, 3)):
model.add(
layers.Dense(
# Let's tune the number of units separately
units = hp.Int(f"units_{i}", min_value=1, max_value = 100, step = 16),
activation = hp.Choice("activation", ["relu", "tanh", "softmax"])
))
if hp.Boolean("dropout"):
model.add(layers.Dropout(rate = 0.25))
model.add(layers.Dense(10, activation = "softmax"))
learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling="log")
model.compile(
optimizer = ks.optimizers.Adam(learning_rate = learning_rate),
loss = "categorical_crossentropy",
metrics = ["accuracy"]
)
return model
build_model(keras_tuner.HyperParameters())
You are getting this error due the input shape mismatch.
Here i have implemented the hypermodel on the mnist fashion dataset which contains images of shape (28,282,1).
def build_model(hp):
model = tf.keras.Sequential([
tf.keras.Input(shape=(28,28,1)),
# adding first conv2d layer
tf.keras.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_1_filter", min_value=1,max_value=100, step = 16),
kernel_size = hp.Choice("conv_1_kernel", values = [3,5]),
activation = hp.Choice("conv_1_activation", ["relu", "tanh", "softmax"]),
input_shape = (28,28,1)
),
tf.keras.layers.MaxPooling2D(
pool_size=hp.Choice('pooling_1',values=[2,3])),
# adding second conv2d layer
tf.keras.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_2_filter", min_value=1,max_value=50, step = 16),
kernel_size = hp.Choice("conv_2_kernel", values = [3,5]),
activation = hp.Choice("conv_2_activation", ["relu", "tanh", "softmax"]),
input_shape = (28,28,1)
)])
tf.keras.layers.MaxPooling2D(
pool_size=hp.Choice('pooling_2',values=[2,3])),
model.add(tf.keras.layers.Flatten())
if hp.Boolean("dropout"):
model.add(tf.keras.layers.Dropout(rate = 0.25))
model.add(tf.keras.layers.Dense(10, activation = "softmax"))
learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling="log")
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate),
loss = "categorical_crossentropy",
metrics = ["accuracy"]
)
return model
By providing the correct shape you will not get any error.
For more details, Please refer to this gist and this documentation. Thank You!

How to freeze/unfreeze a pretrained Model as part of a subclassed Model in Tensorflow?

I am trying to build a subclassed Model which consists of a pretrained convolutional Base and some Dense Layers on top, using Tensorflow >= 2.4.
However freezing/unfreezing of the subclassed Model has no effect once it was trained before. When I do the same with the Functional API everything works as expected. I would really appreciate some Hint to what im missing here: Following Code should specify my problem further. Pardon me the amount of Code:
#Setup
import tensorflow as tf
tf.config.run_functions_eagerly(False)
import numpy as np
from tensorflow.keras.regularizers import l1
import matplotlib.pyplot as plt
#tf.function
def create_images_and_labels(img,label, height = 70, width = 70): #Image augmentation
label = tf.cast(label, 'float32')
label = tf.squeeze(label)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img, (height, width))
# img = preprocess_input(img)
return img, label
cifar = tf.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar.load_data()
num_classes = len(np.unique(y_train))
ds_train = tf.data.Dataset.from_tensor_slices((x_train, tf.one_hot(y_train, depth = len(np.unique(y_train)))))
ds_train = ds_train.map(lambda img, label: create_images_and_labels(img, label, height = 70, width = 70))
ds_train = ds_train.shuffle(50000)
ds_train = ds_train.batch(50, drop_remainder = True)
ds_val = tf.data.Dataset.from_tensor_slices((x_test, tf.one_hot(y_test, depth = len(np.unique(y_train)))))
ds_val = ds_val.map(lambda img, label: create_images_and_labels(img, label, height = 70, width = 70))
ds_val = ds_val.batch(50, drop_remainder=True)
# for i in ds_train.take(1):
# x, y = i
# for ind in range(x.shape[0]):
# plt.imshow(x[ind,:,:])
# plt.show()
# print(y[ind])
'''
Defining simple subclassed Model consisting of
VGG16
Flatten
Dense Layers
customized what happens in model.fit and model.evaluate (Actually its the standard Keras procedure with custom Metrics)
customized metrics: Loss and Accuracy for Training and Validation Step
added unfreezing Method
'set_trainable_layers'
Arguments:
num_head (How many dense Layers)
num_base (How many VGG Layers)
'''
class Test_Model(tf.keras.models.Model):
def __init__(
self,
num_unfrozen_head_layers,
num_unfrozen_base_layers,
num_classes,
conv_base = tf.keras.applications.VGG16(include_top = False, weights = 'imagenet', input_shape = (70,70,3)),
):
super(Test_Model, self).__init__(name = "Test_Model")
self.base = conv_base
self.flatten = tf.keras.layers.Flatten()
self.dense1 = tf.keras.layers.Dense(2048, activation = 'relu')
self.dense2 = tf.keras.layers.Dense(1024, activation = 'relu')
self.dense3 = tf.keras.layers.Dense(128, activation = 'relu')
self.out = tf.keras.layers.Dense(num_classes, activation = 'softmax')
self.out._name = 'out'
self.train_loss_metric = tf.keras.metrics.Mean('Supervised Training Loss')
self.train_acc_metric = tf.keras.metrics.CategoricalAccuracy('Supervised Training Accuracy')
self.val_loss_metric = tf.keras.metrics.Mean('Supervised Validation Loss')
self.val_acc_metric = tf.keras.metrics.CategoricalAccuracy('Supervised Validation Accuracy')
self.loss_fn = tf.keras.losses.categorical_crossentropy
self.learning_rate = 1e-4
# self.build((None, 32,32,3))
self.set_trainable_layers(num_unfrozen_head_layers, num_unfrozen_base_layers)
#tf.function
def call(self, inputs, training = False):
x = self.base(inputs)
x = self.flatten(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
x = self.out(x)
return x
#tf.function
def train_step(self, input_data):
x_batch, y_batch = input_data
with tf.GradientTape() as tape:
tape.watch(x_batch)
y_pred = self(x_batch, training = True)
loss = self.loss_fn(y_batch, y_pred)
trainable_vars = self.trainable_weights
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.train_loss_metric.update_state(loss)
self.train_acc_metric.update_state(y_batch, y_pred)
return {"Supervised Loss": self.train_loss_metric.result(),
"Supervised Accuracy":self.train_acc_metric.result()}
#tf.function
def test_step(self, input_data):
x_batch,y_batch = input_data
y_pred = self(x_batch, training = False)
loss = self.loss_fn(y_batch, y_pred)
self.val_loss_metric.update_state(loss)
self.val_acc_metric.update_state(y_batch, y_pred)
return {"Val Supervised Loss": self.val_loss_metric.result(),
"Val Supervised Accuracy":self.val_acc_metric.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [self.train_loss_metric,
self.train_acc_metric,
self.val_loss_metric,
self.val_acc_metric]
def set_trainable_layers(self, num_head, num_base):
for layer in [lay for lay in self.layers if not isinstance(lay , tf.keras.models.Model)]:
layer.trainable = False
print(layer.name, layer.trainable)
for block in self.layers:
if isinstance(block, tf.keras.models.Model):
print('Found Submodel', block.name)
for layer in block.layers:
layer.trainable = False
print(layer.name, layer.trainable)
if num_base > 0:
for layer in block.layers[-num_base:]:
layer.trainable = True
print(layer.name, layer.trainable)
if num_head > 0:
for layer in [lay for lay in self.layers if not isinstance(lay, tf.keras.models.Model)][-num_head:]:
layer.trainable = True
print(layer.name, layer.trainable)
'''
Showcase1: First training completely frozen Model, then unfreezing:
unfreezed model doesnt learn
'''
model = Test_Model(num_unfrozen_head_layers= 0, num_unfrozen_base_layers = 0, num_classes = num_classes) # Should NOT learn -> doesnt learn
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
model.set_trainable_layers(10,20) # SHOULD LEARN -> Doesnt learn
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
#DOESNT LEARN
'''
Showcase2: when first training the Model with more trainable Layers than in the second step:
AssertionError occurs
'''
model = Test_Model(num_unfrozen_head_layers= 10, num_unfrozen_base_layers = 2, num_classes = num_classes) # SHOULD LEARN -> learns
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
model.set_trainable_layers(1,1) # SHOULD NOT LEARN -> AssertionError
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
'''
Showcase3: same Procedure as in Showcase2 but optimizer State is transferred to recompiled Model:
Cant set Weigthts because optimizer expects List of Length 0
'''
model = Test_Model(num_unfrozen_head_layers= 10, num_unfrozen_base_layers = 20, num_classes = num_classes) # SHOULD LEARN -> learns
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
opti_state = model.optimizer.get_weights()
model.set_trainable_layers(0,0) # SHOULD NOT LEARN -> Learns
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.optimizer.set_weights(opti_state)
model.fit(ds_train, validation_data = ds_val)
#%%%
'''
Constructing same Architecture with Functional API and running Experiments
'''
import tensorflow as tf
conv_base = tf.keras.applications.VGG16(include_top = False, weights = 'imagenet', input_shape = (70,70,3))
inputs = tf.keras.layers.Input((70,70,3))
x = conv_base(inputs)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(2048, activation = 'relu') (x)
x = tf.keras.layers.Dense(1024,activation = 'relu') (x)
x = tf.keras.layers.Dense(128,activation = 'relu') (x)
out = tf.keras.layers.Dense(num_classes,activation = 'softmax') (x)
isinstance(tf.keras.layers.Flatten(), tf.keras.models.Model)
isinstance(conv_base, tf.keras.models.Model)
def set_trainable_layers(mod, num_head, num_base):
import time
for layer in [lay for lay in mod.layers if not isinstance(lay , tf.keras.models.Model)]:
layer.trainable = False
print(layer.name, layer.trainable)
for block in mod.layers:
if isinstance(block, tf.keras.models.Model):
print('Found Submodel')
for layer in block.layers:
layer.trainable = False
print(layer.name, layer.trainable)
if num_base > 0:
for layer in block.layers[-num_base:]:
layer.trainable = True
print(layer.name, layer.trainable)
if num_head > 0:
for layer in [lay for lay in mod.layers if not isinstance(lay, tf.keras.models.Model)][-num_head:]:
layer.trainable = True
print(layer.name, layer.trainable)
'''
Showcase1: First training frozen Model, then unfreezing, recomiling and retraining:
model behaves as expected
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 0 ,0)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model should NOT learn
set_trainable_layers(mod, 10,20)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) #Model SHOULD learn
'''
Showcase2: First training unfrozen Model, then reducing number of trainable Layers:
Model behaves as Expected
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 10 ,20)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model SHOULD learn
set_trainable_layers(mod, 0,0)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) #Model should NOT learn
'''
Showcase3: First training unfrozen Model, then reducing number of trainable Layers but also trying to trasnfer Optimizer States:
Behaves as subclassed Model: New Optimizer shouldnt have Weights
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 1 ,3)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model SHOULD learn
opti_state = mod.optimizer.get_weights()
set_trainable_layers(mod, 4,8)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.optimizer.set_weights(opti_state)
mod.fit(ds_train, validation_data = ds_val) #Model should NOT learn
This is happening because one of the fundamental differences between the Subclassing API and the Functional or Sequential APIs in Tensorflow2.
While the Functional or Sequential APIs build a graph of Layers (think of it as a separate data structure), the Subclassing model builds a whole object and stores it as bytecode.
This means that with Subclassing you lose access to the internal connectivity graph and the normal behaviour that allows you to freeze/unfreeze layers or reuse them in other models starts to get weird. Seeing your implementation I would say that the Subclassed model is correct and it SHOULD be working if we were dealing with a library other than Tensorflow that is.
Francois Chollet explains it better than I will ever do in one of his Tweettorials
After some more experiments i have found a workaround for this Problem:
While the model itself cannot be unfrozen/frozen after the first compilation and training, it is however possible to save the model weights to a temporary file model.save_weights('temp.h5') and afterwards reconstructing the model class (Creating a new instance of model class for example) and loading the previous weights with model.load_weights('temp.h5').
However this can also lead to errors occuring when the previous model has both unfrozen and frozen weights. To prevent them you have to either set all layers trainable after the training and before saving weights, or copy the exact trainability structure of the model, and reconstructing the new model such that its layers have the same trainability state as the previous. this is possible with the following functions:
def get_trainability(model): # Takes Keras model and returns dictionary with layer names of Model as key, and its trainability as value/item
train_dict = {}
for layer in model.layers:
if isinstance(layer, tf.keras.models.Model):
train_dict.update(get_trainability(layer))
else:
train_dict[layer.name] = layer.trainable
return train_dict
def set_trainability(model, train_dict): # Takes keras Model and dictionary with layer names and booleans indicating the desired trainability of the layer.
# modifies model so that every Layer in the Model, whose name matches dict key will get trainable = boolean
for layer in model.layers:
if isinstance(layer, tf.keras.models.Model):
set_trainability(layer, train_dict)
else:
for name in train_dict.keys():
if name == layer.name:
layer.trainable = train_dict[name]
print(layer.name)
Hope this helps for simmilar problems in the Future

Mean of Tensorflow Keras's Glorot Normal Initializer is not zero

As per the documentation of Glorot Normal, mean of the Normal Distribution of the Initial Weights should be zero.
Draws samples from a truncated normal distribution centered on 0
But it doesn't seem to be zero, am I missing something?
Please find the code below:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
print(tf.__version__)
initializer = tf.keras.initializers.GlorotNormal(seed = 1234)
model = Sequential([Dense(units = 3, input_shape = [1], kernel_initializer = initializer,
bias_initializer = initializer),
Dense(units = 1, kernel_initializer = initializer,
bias_initializer = initializer)])
batch_size = 1
x = np.array([-1.0, 0, 1, 2, 3, 4.0], dtype = 'float32')
y = np.array([-3, -1.0, 1, 3.0, 5.0, 7.0], dtype = 'float32')
x = np.reshape(x, (-1, 1))
# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.shuffle(buffer_size=64).batch(batch_size)
epochs = 1
learning_rate=1e-3
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
for epoch in range(epochs):
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = tf.keras.losses.MSE(y_batch_train, logits)
Initial_Weights_1st_Hidden_Layer = model.trainable_weights[0]
Mean_Weights_Hidden_Layer = tf.reduce_mean(Initial_Weights_1st_Hidden_Layer)
Initial_Weights_Output_Layer = model.trainable_weights[2]
Mean_Weights_Output_Layer = tf.reduce_mean(Initial_Weights_Output_Layer)
Initial_Bias_1st_Hidden_Layer = model.trainable_weights[1]
Mean_Bias_Hidden_Layer = tf.reduce_mean(Initial_Bias_1st_Hidden_Layer)
Initial_Bias_Output_Layer = model.trainable_weights[3]
Mean_Bias_Output_Layer = tf.reduce_mean(Initial_Bias_Output_Layer)
if epoch ==0 and step==0:
print('\n Initial Weights of First-Hidden Layer = ', Initial_Weights_1st_Hidden_Layer)
print('\n Mean of Weights of Hidden Layer = %s' %Mean_Weights_Hidden_Layer.numpy())
print('\n Initial Weights of Second-Hidden/Output Layer = ', Initial_Weights_Output_Layer)
print('\n Mean of Weights of Output Layer = %s' %Mean_Weights_Output_Layer.numpy())
print('\n Initial Bias of First-Hidden Layer = ', Initial_Bias_1st_Hidden_Layer)
print('\n Mean of Bias of Hidden Layer = %s' %Mean_Bias_Hidden_Layer.numpy())
print('\n Initial Bias of Second-Hidden/Output Layer = ', Initial_Bias_Output_Layer)
print('\n Mean of Bias of Output Layer = %s' %Mean_Bias_Output_Layer.numpy())
Because you don't draw too many samples from that distribution.
initializer = tf.keras.initializers.GlorotNormal(seed = 1234)
mean = tf.reduce_mean(initializer(shape=(1, 3))).numpy()
print(mean) # -0.29880756
But if you increase the samples:
initializer = tf.keras.initializers.GlorotNormal(seed = 1234)
mean = tf.reduce_mean(initializer(shape=(1, 500))).numpy()
print(mean) # 0.003004579
Same thing applies for your example too. If you increase first dense layer's units to 500, you should see 0.003004579 with same seed.

How does the reuse option in tf.variable_scope work?

I have a following problem, I am writing a simple code to learn how tensorflow works and I am defining the variables for convolution with help of tf.variable_scope. However everytime I try to run this script I get a ValueError saying either to set reuse=None or reuse=True.
Can somebody explain why doesn't it just run the function without defining this option or what would be a solution for that?
My code is:
import re
import tensorflow as tf
import numpy as np
data = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/data.npy')
labels = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/labels.npy')
labels = np.zeros((16400,))
labels[10001:16400]=1
labels = labels.astype(int)
data = data.astype(np.float32)
#labels = tf.cast(labels,tf.int64)
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1 # Initial learning rate.
NUM_CLASSES=2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN= 1000
batch_size=300
def _variable_on_cpu(name, shape, initializer):
dtype = tf.float32
var = tf.get_variable(name, shape, initializer = initializer, dtype = dtype)
return var
def _add_loss_summaries(loss):
"""Add summaries for losses in CIFAR-10 model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def _variable_with_weight_decay(name, shape, stddev, wd):
dtype = tf.float32
var = _variable_on_cpu(
name,
shape,
tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def _activation_summary(x):
tensor_name = re.sub('_[0-9]*/','', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def iterate_batches(data, labels, batch_size, num_epochs):
N = int(labels.shape[0])
batches_per_epoch = int(N/batch_size)
for i in range(num_epochs):
for j in range(batches_per_epoch):
start, stop = j*batch_size, (j+1)*batch_size
yield data[start:stop,:,:,:], labels[start:stop]
def train():
with tf.Graph().as_default():
global_step = tf.Variable(0)
x_tensor = tf.placeholder(tf.float32, shape=(batch_size, 3000,1,1))
y_tensor = tf.placeholder(tf.int64, shape=(batch_size,))
for x,y in iterate_batches(data,labels, 300,1):
print('yey!')
with tf.variable_scope('conv1',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights',
shape=[100,1,1,64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(x_tensor, kernel, [1,3,1,1], padding = 'SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
pool1 = tf.nn.max_pool(conv1, ksize=[1,20,1,1], strides=[1,2,1,1], padding='SAME', name='pool1')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
with tf.variable_scope('conv2',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights', [50,1,64,64], stddev = 5e-2, wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1,3,1,1], padding='SAME')
biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv,biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1,10,1,1], strides=[1,2,1,1], padding='SAME', name='pool2')
with tf.variable_scope('conv3',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights', [30,1,64,64], stddev = 5e-2, wd=0.0)
conv = tf.nn.conv2d(pool2, kernel, [1,10,1,1], padding='SAME')
biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv,biases)
conv3 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv3)
norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm3')
pool3 = tf.nn.max_pool(norm3, ksize=[1,9,1,1], strides=[1,9,1,1], padding='SAME', name='pool3')
with tf.variable_scope('fc4',reuse=True) as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool3, [batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 64], stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
fc4 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(fc4)
with tf.variable_scope('fc5',reuse=True) as scope:
weights = _variable_with_weight_decay('weights', shape=[64, 64],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
fc5 = tf.nn.relu(tf.matmul(fc4, weights) + biases, name=scope.name)
_activation_summary(fc5)
with tf.variable_scope('softmax_linear',) as scope:
weights = _variable_with_weight_decay('weights', [64, NUM_CLASSES],
stddev=1/64.0, wd=0.0)
biases = _variable_on_cpu('biases', [NUM_CLASSES],
tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(fc5, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(softmax_linear, y_tensor, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
kupa = tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
#neu
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True)
loss_averages_op = _add_loss_summaries(loss)
summary_op = tf.merge_all_summaries()
#neu
init = tf.initialize_all_variables()
sess = tf.Session(config = tf.ConfigProto(log_device_placement=False))
sess.run(init)
sess.run([conv, bias, conv1, pool1, norm1, conv2,norm2, pool2, conv3, norm3, pool3,fc4,fc5], feed_dict={x_tensor:x, y_tensor:y})
sess.run([softmax_linear,loss], feed_dict={x_tensor:x, y_tensor:y})
sess.run([lr, loss_averages_op, summary_op], feed_dict={x_tensor:x, y_tensor:y})
The problem is with this line here:
for x,y in iterate_batches(data,labels, 300,1):
This will recreate the graph however many times which is a bad thing to do as it'll take up more memory each time (this isn't always the case but it can happen).
The reuse=True comes in something like this example below when defining the graph.
# First call creates one set of variables.
result1 = my_image_filter(image1)
# Another set of variables is created in the second call.
result2 = my_image_filter(image2)
Tensorflow doesn't know whether or not you want to "reuse" the variables as in should they share the same parameters or not.
In your specific case by looping your recreating the parameters each time and telling tensorflow to simply reuse the variables.
It would be better if you could move the for loop to after the graph creation has already occurred and then you could get rid of the reuse=True everywhere.