restoring error in tensorflow - tensorflow

I am a beginner with tensorflow and I was tinkering around with convnet for image recognition.However after I save my model I am getting an error while restoring it.
this is my tensor graph components ->
Y_train = to_categorical(y_train,num_classes=4)
Y_test = to_categorical(y_test,num_classes=4)
X = tf.placeholder(tf.float32, shape=(None, 64,64,3))
Y = tf.placeholder(tf.float32, shape=(None, 4))
w1 = tf.get_variable("w1", [4,4,3,8], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
w2 = tf.get_variable("w2", [2,2,8,16], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
Z1 = tf.nn.conv2d(X,w1, strides = [1,1,1,1], padding = 'SAME')
A1 = tf.nn.relu(Z1)
P1 = tf.nn.max_pool(A1, ksize = [1,8,8,1], strides = [1,8,8,1], padding = 'SAME')
Z2 = tf.nn.conv2d(P1,w2, strides = [1,1,1,1], padding = 'SAME')
A2 = tf.nn.relu(Z2)
P2 = tf.nn.max_pool(A2, ksize = [1,4,4,1], strides = [1,4,4,1], padding = 'SAME')
P2 = tf.contrib.layers.flatten(P2)
Z3 = tf.contrib.layers.fully_connected(P2,4,activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = Z3, labels = Y))
optimizer = tf.train.AdamOptimizer(0.004).minimize(cost)
this is a basic convnet which i have successfully trained and tested.
However the problem I am facing is that after all the epochs have been completed for training, How do I save this model such that I can use it again in some file say predict.py where in I can just import it and make predictions
So i read some blogs on save and restore and did that but then I was getting the below mention error
Attempting to use uninitialized value fully_connected/biases [[Node:
fully_connected/biases/read = IdentityT=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
so given the convnet what should I do to use that model ? can someone provide working code for both the model file and predict file.

Related

Creating several weight tensors for each object in Multi-Object Tracking (MOT) using TensorFlow

I am using TensorFlow V1.10.0 and developing a Multi-Object Tracker based on MDNet. I need to assign a separate weight matrix for each detected object for the fully connected layers in order to get different embedding for each object during online training. I am using this tf.map_fn in order to generate a higher-order weight tensor (n_objects, flattened layer, hidden_units),
'''
def dense_fc4(n_objects):
initializer = lambda: tf.contrib.layers.xavier_initializer()(shape=(1024, 512))
return tf.Variable(initial_value=initializer, name='fc4/kernel',
shape=(n_objects.shape[0], 1024, 512))
W4 = tf.map_fn(dense_fc4, samples_flat)
b4 = tf.get_variable('fc4/bias', shape=512, initializer=tf.zeros_initializer())
fc4 = tf.add(tf.matmul(samples_flat, W4), b4)
fc4 = tf.nn.relu(fc4)
'''
However during execution when I run the session for W4 I get a weight matrix but all having the same values. Any help?
TIA
Here is a workaround, I was able to generate the multiple kernels outside the graph in a for loop and then giving it to the graph:
w6 = []
for n_obj in range(pos_data.shape[0]):
w6.append(tf.get_variable("fc6/kernel-" + str(n_obj), shape=(512, 2),
initializer=tf.contrib.layers.xavier_initializer()))
print("modeling fc6 branches...")
prob, train_op, accuracy, loss, pred, initialize_vars, y, fc6 = build_branches(fc5, w6)
def build_branches(fc5, w6):
y = tf.placeholder(tf.int64, [None, None])
b6 = tf.get_variable('fc6/bias', shape=2, initializer=tf.zeros_initializer())
fc6 = tf.add(tf.matmul(fc5, w6), b6)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=fc6))
train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="fc6")
with tf.variable_scope("", reuse=tf.AUTO_REUSE):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001, name='adam')
train_op = optimizer.minimize(loss, var_list=train_vars)
initialize_vars = train_vars
initialize_vars += [optimizer.get_slot(var, name)
for name in optimizer.get_slot_names()
for var in train_vars]
if isinstance(optimizer, tf.train.AdamOptimizer):
initialize_vars += optimizer._get_beta_accumulators()
prob = tf.nn.softmax(fc6)
pred = tf.argmax(prob, 2)
correct_pred = tf.equal(pred, y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return prob, train_op, accuracy, loss, pred, initialize_vars, y, fc6

Training custom-made CNN model in eager execution programming environment

I have built CNN model by using the principle of "Model Sublclassing" in Keras. Here is the class which represents my model:
class ConvNet(tf.keras.Model):
def __init__(self, data_format, classes):
super(ConvNet, self).__init__()
if data_format == "channels_first":
axis = 1
elif data_format == "channels_last":
axis = -1
self.conv_layer1 = tf.keras.layers.Conv2D(filters = 32, kernel_size = 3,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer1 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))
self.conv_layer2 = tf.keras.layers.Conv2D(filters = 64, kernel_size = 3,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer2 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))
self.conv_layer3 = tf.keras.layers.Conv2D(filters = 128, kernel_size = 5,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer3 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (1,1),
padding = "same")
self.flatten = tf.keras.layers.Flatten()
self.dense_layer1 = tf.keras.layers.Dense(units = 512, activation = "relu")
self.dense_layer2 = tf.keras.layers.Dense(units = classes, activation = "softmax")
def call(self, inputs, training = True):
output_tensor = self.conv_layer1(inputs)
output_tensor = self.pool_layer1(output_tensor)
output_tensor = self.conv_layer2(output_tensor)
output_tensor = self.pool_layer2(output_tensor)
output_tensor = self.conv_layer3(output_tensor)
output_tensor = self.pool_layer3(output_tensor)
output_tensor = self.flatten(output_tensor)
output_tensor = self.dense_layer1(output_tensor)
return self.dense_layer2(output_tensor)
I would like to know how to train it "eagerly", and by that I mean avoiding the use of compile and fit methods.
I am not sure how to exactly construct the training loop. I understand that I must perform tf.GradientTape.gradient() function in order to calculate the gradients and then use optimizers.apply_gradients() in order to update my model parameters.
What I do not understand is how can I make predictions with my model in order to get logits and then use them to calculate the loss. If someone could help me with the idea of how to construct the training loop I would really appreciate it.
Eager execution is the imperative programming mode to let developers follow Python's natural control flow. Essentially, you wouldn't need to first create placeholders, computational graphs and then execute them in TensorFlow sessions. You can use automatic differentiation to compute gradients in your training loop:
for i in range(iterations):
with tf.GradientTape() as tape:
logits = model(batch_examples, training = True)
loss = tf.losses.sparse_softmax_cross_entropy(batch_labels, logits)
grads = tape.gradient(loss, model.trainable_variables)
opt.apply_gradients([grads, model.trainable_variables])
This is assuming that the model is of the class Model from Keras. I hope this solves your problem! You should also check out the TensorFlow Guide on Eager Execution.

conv2d does not work

I am training a CNN in tensorflow but the program seems to have stuck at the first tf.nn.conv2d step. I have imported the cifar 10 dataset from keras and running my code in floydhub.
I haven't started the session yet, this is just the computation graph.
Link to the complete notebook
This is the part where it gets stuck:
# forward propagation
# convolution layer 1
c1 = tf.nn.conv2d(x_train, w1, strides = [1,1,1,1], padding = 'VALID')
# activation function for c1: relu
r1 = tf.nn.relu(c1)
# maxpooling
p1 = tf.nn.max_pool(r1, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
# convolution layer 2
c2 = tf.nn.conv2d(p1, w2, strides = [1,1,1,1], padding='VALID')
# activation function for c2: relu
r2 = tf.nn.relu(c2)
# maxpooling
p2 = tf.nn.max_pool(r2, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
# flattening the previous max pool layer
l1 = tf.contrib.layers.flatten(p2)
# fully connected layer
final = tf.contrib.layers.fully_connected(l1, 10, activation_fn = None)
EDIT:-
This is how I imported the dataset
# importing 50000 images of size 32x32x3
i = 0
# this list holds the images
img_base = []
for img in glob.glob("train\\*.png"):
img_base.append(cv2.imread(img))
# x_train is a nx32x32x3 matrix of n no. of images
x_train = np.array(img_base[0:40000]).astype(np.float32)
placeholders for training images and labels
# creating placeholders
x = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.float32, [None, 10])
weight initialisation`
tf.reset_default_graph()
# creating weights
w1 = tf.get_variable('w1', [4,4,3,10], initializer=tf.contrib.layers.xavier_initializer())
w2 = tf.get_variable('w2', [4,4,10,15], initializer=tf.contrib.layers.xavier_initializer())

Implementing convolutional layers using Tensorflow

I am trying to implement convolutional layers for text classification from this blog post with some modification to suit my needs.
In the blog, there is only one convolution layer while I'd like mine to have two convolutional layers followed by ReLU and max-pooling.
The code so far is:
vocab_size = 2000
embedding_size = 100
filter_height = 5
filter_width = embedding_size
no_of_channels = 1
no_of_filters = 256
sequence_length = 50
filter_size = 3
no_of_classes = 26
input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
input_y = tf.placeholder(tf.float32, [None, no_of_classes], name="input_y")
# Defining the embedding layer:
with tf.device('/cpu:0'), tf.name_scope("embedding"):
W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W")
embedded_chars = tf.nn.embedding_lookup(W, input_x)
embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
# Convolution block:
with tf.name_scope("convolution-block"):
filter_shape = [filter_height, embedding_size, no_of_channels, no_of_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[no_of_filters]), name="b")
conv1 = tf.nn.conv2d(embedded_chars_expanded,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv1")
conv2 = tf.nn.conv2d(conv1,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv2")
Here, W is the filter matrix.
However, this gives the error:
ValueError: Dimensions must be equal, but are 256 and 1 for 'convolution-block_16/conv2' (op: 'Conv2D') with input shapes: [?,46,1,256], [5,100,1,256].
I realise I have erred in the dimensions of the layer, but I am unable to fix it or put in the correct dimensions.
If anybody could provide any guidance/help, it'd be really helpful.
Thank you.
Can't quite understand what you code to do, but change as follows will fix your problem.
with tf.name_scope("convolution-block"):
filter_shape = [filter_height, embedding_size, no_of_channels, no_of_channels #change the output channel as input#]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[no_of_filters]), name="b")
conv1 = tf.nn.conv2d(embedded_chars_expanded,
W,
strides = [1,1,1,1],
padding = "SAME", ##Change the padding scheme
name = "conv1")
conv2 = tf.nn.conv2d(conv1,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv2")

LSTM model error is percent of one output class

I'm having a rough time trying to figure out what's wrong with my LSTM model. I have 11 inputs, and 2 output classes (one-hot encoded) and very quickly, like within 1 batch or so, the error just goes to the % of one of the output classes and stays there.
I tried printing weights and biases, but they seem to all be full of NaN.
If i decrease the learning rate, or mess around with layers/units, I can get it to arrive at the % of one class error slowly, but it seems to always get to that point.
Here's the code:
num_units = 30
num_layers = 50
dropout_rate = 0.80
learning_rate=0.0001
batch_size = 180
epoch = 1
input_classes = len(train_input[0])
output_classes = len(train_output[0])
data = tf.placeholder(tf.float32, [None, input_classes, 1]) #Number of examples, number of input, dimension of each input
target = tf.placeholder(tf.float32, [None, output_classes]) #one-hot encoded: [1,0] = bad, [0,1] = good
dropout = tf.placeholder(tf.float32)
cell = tf.contrib.rnn.LSTMCell(num_units, state_is_tuple=True)
cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout)
cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
#Input shape [batch_size, max_time, depth], output shape: [batch_size, max_time, cell.output_size]
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2]) #reshapes it to [sequence_size, batch_size, depth]
#get last entry as it includes previous results
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.get_variable("W", shape=[num_units, output_classes], initializer=tf.contrib.layers.xavier_initializer())
bias = tf.get_variable("B", shape=[output_classes], initializer=tf.contrib.layers.xavier_initializer())
logits = tf.matmul(last, weight) + bias
prediction = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target)
prediction = tf.clip_by_value(prediction, 1e-10,100.0)
cost = tf.reduce_mean(prediction)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
minimize = optimizer.minimize(cost)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(logits, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.global_variables_initializer()
saver = tf.train.Saver()
sess = tf.Session()
sess.run(init_op)
no_of_batches = int((len(train_input)) / batch_size)
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out, dropout: dropout_rate })
sess.close()
Since you have one hot encoding use sparse_softmax_cross_entropy_with_logits instead of tf.nn.softmax_cross_entropy_with_logits.
Refer to this stackoverflow answer to understand the difference of two functions.
1