Creating several weight tensors for each object in Multi-Object Tracking (MOT) using TensorFlow - tensorflow

I am using TensorFlow V1.10.0 and developing a Multi-Object Tracker based on MDNet. I need to assign a separate weight matrix for each detected object for the fully connected layers in order to get different embedding for each object during online training. I am using this tf.map_fn in order to generate a higher-order weight tensor (n_objects, flattened layer, hidden_units),
'''
def dense_fc4(n_objects):
initializer = lambda: tf.contrib.layers.xavier_initializer()(shape=(1024, 512))
return tf.Variable(initial_value=initializer, name='fc4/kernel',
shape=(n_objects.shape[0], 1024, 512))
W4 = tf.map_fn(dense_fc4, samples_flat)
b4 = tf.get_variable('fc4/bias', shape=512, initializer=tf.zeros_initializer())
fc4 = tf.add(tf.matmul(samples_flat, W4), b4)
fc4 = tf.nn.relu(fc4)
'''
However during execution when I run the session for W4 I get a weight matrix but all having the same values. Any help?
TIA

Here is a workaround, I was able to generate the multiple kernels outside the graph in a for loop and then giving it to the graph:
w6 = []
for n_obj in range(pos_data.shape[0]):
w6.append(tf.get_variable("fc6/kernel-" + str(n_obj), shape=(512, 2),
initializer=tf.contrib.layers.xavier_initializer()))
print("modeling fc6 branches...")
prob, train_op, accuracy, loss, pred, initialize_vars, y, fc6 = build_branches(fc5, w6)
def build_branches(fc5, w6):
y = tf.placeholder(tf.int64, [None, None])
b6 = tf.get_variable('fc6/bias', shape=2, initializer=tf.zeros_initializer())
fc6 = tf.add(tf.matmul(fc5, w6), b6)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=fc6))
train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="fc6")
with tf.variable_scope("", reuse=tf.AUTO_REUSE):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001, name='adam')
train_op = optimizer.minimize(loss, var_list=train_vars)
initialize_vars = train_vars
initialize_vars += [optimizer.get_slot(var, name)
for name in optimizer.get_slot_names()
for var in train_vars]
if isinstance(optimizer, tf.train.AdamOptimizer):
initialize_vars += optimizer._get_beta_accumulators()
prob = tf.nn.softmax(fc6)
pred = tf.argmax(prob, 2)
correct_pred = tf.equal(pred, y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return prob, train_op, accuracy, loss, pred, initialize_vars, y, fc6

Related

Difference equation in LSTM network on Tensorflow

I'd like to use a LSTM network on Tensorflow to implement a difference equation. I searched on internet but I didn't find anything about this topic.
The equation is:
formula
in which b=[1, 2, 1] and a=[1, -1.6641, 0.8387].
My aim is to use a neural network to find the correlation between input and output. Due to that to find the output ad k-instant you have to know also the previous inputs and outputs, my idea is to implement a LSTM network (many to one structure).
If we suppose to have an input vector of 500 samples and to use a window size of 5, the input of LSTM network is a vector of shape (500,5,1) while the output is (500,1,1).
The IN%OUT of first iteration are:
[0; x(k-4), x(k-3), x(k-2), x(k-1), x(k); 1] -> [1; y(k); 1]
formula
in the second iteration:
[0; x(k-3), x(k-2), x(k-1), x(k), x(k+1); 1] -> [1; y(k+1); 1]
formula
So I used a LSMT network with stateful set to TRUE to allow the network to remember past states but it doesn't converge.
It seems to me that the idea is correct but I cannot see where I am going wrong. Could someone help me find the problem? I copy and paste the code below and the network is developed on Tensorflow.
# Difference equation
K = 0.0436
b = np.array([1,2,1])
a = np.array([1, -1.6641, 0.8387])
x = np.random.uniform(0, 1, 100)
y = K*(signal.lfilter(b,a,x))
# Generate Dataset
X_train = np.random.uniform(0, 1, 100)
y_train = K*(signal.lfilter(b,a,X_train))
X_val = np.ones(100)
y_val = K*(signal.lfilter(b,a,X_val))
X_test = np.random.uniform(0.5, 0.8, 100)
y_test = K*(signal.lfilter(b,a,X_test))
def get_x_split(data, windows_size):
""" Return sliding window dataset. """
x_temp = np.zeros([1,windows_size-1])
x = np.array([])
for i in range(0,len(data)):
x_temp = np.append(x_temp[-windows_size+1:], data[i]).T
x = np.append(x, x_temp, axis=0)
x = np.reshape(x, (int(len(x)/windows_size), windows_size))
return x
windows_size = 10
X_train = get_x_split(X_train, windows_size)
X_val = get_x_split(X_val, windows_size)
X_test = get_x_split(X_test, windows_size)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Model Definition
activation_function = 'tanh'
def build_model():
input_layer = Input(shape=(X_train.shape[1],1), batch_size=1)
HL_1 = LSTM(1, activation=activation_function, return_sequences=True, stateful = True)(input_layer)
HL_2 = LSTM(1, activation=activation_function, return_sequences=False, stateful = True)(HL_1)
output_layer = Dense(1, activation='relu',name='Output')(HL_2)
model = Model(inputs=input_layer, outputs=output_layer)
return model
model = build_model()
model.compile(optimizer=RMSprop(),
loss={'Output': 'mse'}, #mse
metrics={'Output': tf.keras.metrics.RootMeanSquaredError()})
# Training
history = model.fit(x=X_train,
y=y_train,
batch_size=1,
validation_data=(X_val, y_val),
epochs=5000,
verbose=1,
shuffle=False)
# Test
y_pred = model.predict(X_test)
pred_samples = 400
plt.figure(dpi=1200)
plt.plot(y_test[300:pred_samples,3,0], label='true', linewidth=0.8, alpha=0.5)
plt.plot(y_pred[300:pred_samples,3,0], label='pred')
plt.legend()
plt.grid()
plt.title("Test")
plt.show()

Fully Convolutional Network, Training Error

I apologize that I'm not good at English.
I'm trying to build my own Fully Convolutional Network using TensorFlow.
But I have difficulties on training this model with my own image data, whereas the MNIST data worked properly.
Here is my FCN model code: (Not using pre-trained or pre-bulit model)
import tensorflow as tf
import numpy as np
Loading MNIST Data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
images_flatten = tf.placeholder(tf.float32, shape=[None, 784])
images = tf.reshape(images_flatten, [-1,28,28,1]) # CNN deals with 3 dimensions
labels = tf.placeholder(tf.float32, shape=[None, 10])
keep_prob = tf.placeholder(tf.float32) # Dropout Ratio
Convolutional Layers
# Conv. Layer #1
W1 = tf.Variable(tf.truncated_normal([3, 3, 1, 4], stddev = 0.1))
b1 = tf.Variable(tf.truncated_normal([4], stddev = 0.1))
FMA = tf.nn.conv2d(images, W1, strides=[1,1,1,1], padding='SAME')
# FMA stands for Fused Multiply Add, which means convolution
RELU = tf.nn.relu(tf.add(FMA, b1))
POOL = tf.nn.max_pool(RELU, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# Conv. Layer #2
W2 = tf.Variable(tf.truncated_normal([3, 3, 4, 8], stddev = 0.1))
b2 = tf.Variable(tf.truncated_normal([8], stddev = 0.1))
FMA = tf.nn.conv2d(POOL, W2, strides=[1,1,1,1], padding='SAME')
RELU = tf.nn.relu(tf.add(FMA, b2))
POOL = tf.nn.max_pool(RELU, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# Conv. Layer #3
W3 = tf.Variable(tf.truncated_normal([7, 7, 8, 16], stddev = 0.1))
b3 = tf.Variable(tf.truncated_normal([16], stddev = 0.1))
FMA = tf.nn.conv2d(POOL, W3, strides=[1,1,1,1], padding='VALID')
RELU = tf.nn.relu(tf.add(FMA, b3))
# Dropout
Dropout = tf.nn.dropout(RELU, keep_prob)
# Conv. Layer #4
W4 = tf.Variable(tf.truncated_normal([1, 1, 16, 10], stddev = 0.1))
b4 = tf.Variable(tf.truncated_normal([10], stddev = 0.1))
FMA = tf.nn.conv2d(Dropout, W4, strides=[1,1,1,1], padding='SAME')
LAST_RELU = tf.nn.relu(tf.add(FMA, b4))
Summary: [Conv-ReLU-Pool] - [Conv-ReLU-Pool] - [Conv-ReLU] - [Dropout] - [Conv-ReLU]
Define Loss, Accuracy
prediction = tf.squeeze(LAST_RELU)
# Because FCN returns (1 x 1 x class_num) in training
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, labels))
# First arg is 'logits=' and the other one is 'labels='
optimizer = tf.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
label_max = tf.argmax(labels, 1)
pred_max = tf.argmax(prediction, 1)
correct_pred = tf.equal(pred_max, label_max)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Training Model
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10000):
image_batch, label_batch = mnist.train.next_batch(100)
sess.run(train, feed_dict={images: image_batch, labels: label_batch, keep_prob: 0.8})
if i % 10 == 0:
tr = sess.run([loss, accuracy], feed_dict={images: image_batch, labels: label_batch, keep_prob: 1.0})
print("Step %d, Loss %g, Accuracy %g" % (i, tr[0], tr[1]))
Loss: 0.784 (Approximately)
Accuracy: 94.8% (Approximately)
The problem is that, training this model with MNIST data worked very well, but with my own data, loss is always same(0.6319), and the output layer is always 0.
There is no difference with the code, excepting for the third convolutional layer's filter size. This filter size and input size which is compressed by previous pooling layers, must have same width & height. That's why the filter size in this layer is [7,7].
What is wrong with my model?..
The only different code between two cases (MNIST, my own data) is:
Placeholder
My own data has (128 x 64 x 1) and the label is 'eyes', 'not_eyes'
images = tf.placeholder(tf.float32, [None, 128, 64, 1])
labels = tf.placeholder(tf.int32, [None, 2])
3rd Convolutional Layer
W3 = tf.Variable(tf.truncated_normal([32, 16, 8, 16], stddev = 0.1))
Feeding (Batch)
image_data, label_data = input_data.get_batch(TRAINING_FILE, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(10000):
image_batch, label_batch = sess.run([image_data, label_data])
sess.run(train, feed_dict={images: image_batch, labels: label_batch, keep_prob: 0.8})
if i % 10 == 0: ... # Validation part is almost same, too...
coord.request_stop()
coord.join(threads)
Here "input_data" is an another python file in the same directory, and "get_batch(TRAINING_FILE, 10)" is the function that returns batch data. The code is:
def get_input_queue(txtfile_name):
images = []
labels = []
for line in open(txtfile_name, 'r'): # Here txt file has data's path, label, label number
cols = re.split(',|\n', line)
labels.append(int(cols[2]))
images.append(tf.image.decode_jpeg(tf.read_file(cols[0]), channels = 1))
input_queue = tf.train.slice_input_producer([images, labels], shuffle = True)
return input_queue
def get_batch(txtfile_name, batch_size):
input_queue = get_input_queue(txtfile_name)
image = input_queue[0]
label = input_queue[1]
image = tf.reshape(image, [128, 64, 1])
batch_image, batch_label = tf.train.batch([image, label], batch_size)
batch_label_one_hot = tf.one_hot(tf.to_int64(batch_label), 2, on_value=1.0, off_value=0.0)
return batch_image, batch_label_one_hot
It seems not to have any problem .... :( Please Help me..!!
Are your inputs scaled appropriately?. The jpegs are in [0-255] range and it needs to be scaled to [-1 - 1]. You can try:
image = tf.reshape(image, [128, 64, 1])
image = tf.scalar_mul((1.0/255), image)
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
What is the accuracy you are getting with your model for MNIST? It would be helpful if you post the code. Are you using the trained model to evaluate the output for your own data.
A general suggestion on setting up the convolution model is provided here.
Here is the model suggestion according to the article :-
INPUT -> [[CONV -> RELU]*N -> POOL?]*M -> [FC -> RELU]*K -> FC
Having more than one layers of CONV->RELU pair before pooling improves learning complex features. Try with N=2 instead of 1.
Some other suggestions:
While you are preparing your data reduce it to smaller size than 128x64. Try same size as the MNIST data ..
image = tf.reshape(image, [28, 28, 1])
If your eye/noeye image is color, then convert it to greyscale and normalize the values to unity range. You can do this using numpy or tf, here is how using numpy
grayscale-->
img = np.dot(np.array(img, dtype='float32'), [[0.2989],[0.5870],[0.1140]])
normalize-->
mean = np.mean(img, dtype='float32')
std = np.std(img, dtype='float32', ddof=1)
if std < 1e-4: std = 1.
img = (img - mean) / std

Siamese Model with LSTM network fails to train using tensorflow

Dataset Description
The dataset contains a set of question pairs and a label which tells if the questions are same. e.g.
"How do I read and find my YouTube comments?" , "How can I see all my
Youtube comments?" , "1"
The goal of the model is to identify if the given question pair is same or different.
Approach
I have created a Siamese network to identify if two questions are same. Following is the model:
graph = tf.Graph()
with graph.as_default():
embedding_placeholder = tf.placeholder(tf.float32, shape=embedding_matrix.shape, name='embedding_placeholder')
with tf.variable_scope('siamese_network') as scope:
labels = tf.placeholder(tf.int32, [batch_size, None], name='labels')
keep_prob = tf.placeholder(tf.float32, name='question1_keep_prob')
with tf.name_scope('question1') as question1_scope:
question1_inputs = tf.placeholder(tf.int32, [batch_size, seq_len], name='question1_inputs')
question1_embedding = tf.get_variable(name='embedding', initializer=embedding_placeholder, trainable=False)
question1_embed = tf.nn.embedding_lookup(question1_embedding, question1_inputs)
question1_lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
question1_drop = tf.contrib.rnn.DropoutWrapper(question1_lstm, output_keep_prob=keep_prob)
question1_multi_lstm = tf.contrib.rnn.MultiRNNCell([question1_drop] * lstm_layers)
q1_initial_state = question1_multi_lstm.zero_state(batch_size, tf.float32)
question1_outputs, question1_final_state = tf.nn.dynamic_rnn(question1_multi_lstm, question1_embed, initial_state=q1_initial_state)
scope.reuse_variables()
with tf.name_scope('question2') as question2_scope:
question2_inputs = tf.placeholder(tf.int32, [batch_size, seq_len], name='question2_inputs')
question2_embedding = question1_embedding
question2_embed = tf.nn.embedding_lookup(question2_embedding, question2_inputs)
question2_lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
question2_drop = tf.contrib.rnn.DropoutWrapper(question2_lstm, output_keep_prob=keep_prob)
question2_multi_lstm = tf.contrib.rnn.MultiRNNCell([question2_drop] * lstm_layers)
q2_initial_state = question2_multi_lstm.zero_state(batch_size, tf.float32)
question2_outputs, question2_final_state = tf.nn.dynamic_rnn(question2_multi_lstm, question2_embed, initial_state=q2_initial_state)
Calculate the cosine distance using the RNN outputs:
with graph.as_default():
diff = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(question1_outputs[:, -1, :], question2_outputs[:, -1, :])), reduction_indices=1))
margin = tf.constant(1.)
labels = tf.to_float(labels)
match_loss = tf.expand_dims(tf.square(diff, 'match_term'), 0)
mismatch_loss = tf.expand_dims(tf.maximum(0., tf.subtract(margin, tf.square(diff)), 'mismatch_term'), 0)
loss = tf.add(tf.matmul(labels, match_loss), tf.matmul((1 - labels), mismatch_loss), 'loss_add')
distance = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(distance)
Following is the code to train the model:
with graph.as_default():
saver = tf.train.Saver()
with tf.Session(graph=graph) as sess:
sess.run(tf.global_variables_initializer(), feed_dict={embedding_placeholder: embedding_matrix})
iteration = 1
for e in range(epochs):
summary_writer = tf.summary.FileWriter('/Users/mithun/projects/kaggle/quora_question_pairs/logs', sess.graph)
summary_writer.add_graph(sess.graph)
for ii, (x1, x2, y) in enumerate(get_batches(question1_train, question2_train, label_train, batch_size), 1):
feed = {question1_inputs: x1,
question2_inputs: x2,
labels: y[:, None],
keep_prob: 0.9
}
loss1 = sess.run([distance], feed_dict=feed)
if iteration%5==0:
print("Epoch: {}/{}".format(e, epochs),
"Iteration: {}".format(iteration),
"Train loss: {:.3f}".format(loss1))
if iteration%50==0:
val_acc = []
for x1, x2, y in get_batches(question1_val, question2_val, label_val, batch_size):
feed = {question1_inputs: x1,
question2_inputs: x2,
labels: y[:, None],
keep_prob: 1
}
batch_acc = sess.run([accuracy], feed_dict=feed)
val_acc.append(batch_acc)
print("Val acc: {:.3f}".format(np.mean(val_acc)))
iteration +=1
saver.save(sess, "checkpoints/quora_pairs.ckpt")
I have trained the above model with about 10,000 labeled data. But, the accuracy is stagnant at around 0.630 and strangely the validation accuracy is same across all the iterations.
lstm_size = 64
lstm_layers = 1
batch_size = 128
learning_rate = 0.001
Is there anything wrong with the way I have created the model?
This is a common problem with imbalanced datasets like the recently released Quora dataset which you are using. Since the Quora dataset is imbalanced (~63% negative and ~37% positive examples) you need proper initialization of weights. Without weight initialization your solution will be stuck in a local minima and it will train to predict only the negative class. Hence the 63% accuracy, because that is the percentage of 'not similar' questions in your validation data. If you check the results obtained on your validation set you will notice that it predicts all zeros. A truncated normal distribution proposed in He et al., http://arxiv.org/abs/1502.01852 is a good alternate for initializing the weights.

LSTM model error is percent of one output class

I'm having a rough time trying to figure out what's wrong with my LSTM model. I have 11 inputs, and 2 output classes (one-hot encoded) and very quickly, like within 1 batch or so, the error just goes to the % of one of the output classes and stays there.
I tried printing weights and biases, but they seem to all be full of NaN.
If i decrease the learning rate, or mess around with layers/units, I can get it to arrive at the % of one class error slowly, but it seems to always get to that point.
Here's the code:
num_units = 30
num_layers = 50
dropout_rate = 0.80
learning_rate=0.0001
batch_size = 180
epoch = 1
input_classes = len(train_input[0])
output_classes = len(train_output[0])
data = tf.placeholder(tf.float32, [None, input_classes, 1]) #Number of examples, number of input, dimension of each input
target = tf.placeholder(tf.float32, [None, output_classes]) #one-hot encoded: [1,0] = bad, [0,1] = good
dropout = tf.placeholder(tf.float32)
cell = tf.contrib.rnn.LSTMCell(num_units, state_is_tuple=True)
cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout)
cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
#Input shape [batch_size, max_time, depth], output shape: [batch_size, max_time, cell.output_size]
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2]) #reshapes it to [sequence_size, batch_size, depth]
#get last entry as it includes previous results
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.get_variable("W", shape=[num_units, output_classes], initializer=tf.contrib.layers.xavier_initializer())
bias = tf.get_variable("B", shape=[output_classes], initializer=tf.contrib.layers.xavier_initializer())
logits = tf.matmul(last, weight) + bias
prediction = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target)
prediction = tf.clip_by_value(prediction, 1e-10,100.0)
cost = tf.reduce_mean(prediction)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
minimize = optimizer.minimize(cost)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(logits, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.global_variables_initializer()
saver = tf.train.Saver()
sess = tf.Session()
sess.run(init_op)
no_of_batches = int((len(train_input)) / batch_size)
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out, dropout: dropout_rate })
sess.close()
Since you have one hot encoding use sparse_softmax_cross_entropy_with_logits instead of tf.nn.softmax_cross_entropy_with_logits.
Refer to this stackoverflow answer to understand the difference of two functions.
1

How does the reuse option in tf.variable_scope work?

I have a following problem, I am writing a simple code to learn how tensorflow works and I am defining the variables for convolution with help of tf.variable_scope. However everytime I try to run this script I get a ValueError saying either to set reuse=None or reuse=True.
Can somebody explain why doesn't it just run the function without defining this option or what would be a solution for that?
My code is:
import re
import tensorflow as tf
import numpy as np
data = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/data.npy')
labels = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/labels.npy')
labels = np.zeros((16400,))
labels[10001:16400]=1
labels = labels.astype(int)
data = data.astype(np.float32)
#labels = tf.cast(labels,tf.int64)
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1 # Initial learning rate.
NUM_CLASSES=2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN= 1000
batch_size=300
def _variable_on_cpu(name, shape, initializer):
dtype = tf.float32
var = tf.get_variable(name, shape, initializer = initializer, dtype = dtype)
return var
def _add_loss_summaries(loss):
"""Add summaries for losses in CIFAR-10 model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def _variable_with_weight_decay(name, shape, stddev, wd):
dtype = tf.float32
var = _variable_on_cpu(
name,
shape,
tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def _activation_summary(x):
tensor_name = re.sub('_[0-9]*/','', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def iterate_batches(data, labels, batch_size, num_epochs):
N = int(labels.shape[0])
batches_per_epoch = int(N/batch_size)
for i in range(num_epochs):
for j in range(batches_per_epoch):
start, stop = j*batch_size, (j+1)*batch_size
yield data[start:stop,:,:,:], labels[start:stop]
def train():
with tf.Graph().as_default():
global_step = tf.Variable(0)
x_tensor = tf.placeholder(tf.float32, shape=(batch_size, 3000,1,1))
y_tensor = tf.placeholder(tf.int64, shape=(batch_size,))
for x,y in iterate_batches(data,labels, 300,1):
print('yey!')
with tf.variable_scope('conv1',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights',
shape=[100,1,1,64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(x_tensor, kernel, [1,3,1,1], padding = 'SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
pool1 = tf.nn.max_pool(conv1, ksize=[1,20,1,1], strides=[1,2,1,1], padding='SAME', name='pool1')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
with tf.variable_scope('conv2',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights', [50,1,64,64], stddev = 5e-2, wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1,3,1,1], padding='SAME')
biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv,biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1,10,1,1], strides=[1,2,1,1], padding='SAME', name='pool2')
with tf.variable_scope('conv3',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights', [30,1,64,64], stddev = 5e-2, wd=0.0)
conv = tf.nn.conv2d(pool2, kernel, [1,10,1,1], padding='SAME')
biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv,biases)
conv3 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv3)
norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm3')
pool3 = tf.nn.max_pool(norm3, ksize=[1,9,1,1], strides=[1,9,1,1], padding='SAME', name='pool3')
with tf.variable_scope('fc4',reuse=True) as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool3, [batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 64], stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
fc4 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(fc4)
with tf.variable_scope('fc5',reuse=True) as scope:
weights = _variable_with_weight_decay('weights', shape=[64, 64],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
fc5 = tf.nn.relu(tf.matmul(fc4, weights) + biases, name=scope.name)
_activation_summary(fc5)
with tf.variable_scope('softmax_linear',) as scope:
weights = _variable_with_weight_decay('weights', [64, NUM_CLASSES],
stddev=1/64.0, wd=0.0)
biases = _variable_on_cpu('biases', [NUM_CLASSES],
tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(fc5, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(softmax_linear, y_tensor, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
kupa = tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
#neu
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True)
loss_averages_op = _add_loss_summaries(loss)
summary_op = tf.merge_all_summaries()
#neu
init = tf.initialize_all_variables()
sess = tf.Session(config = tf.ConfigProto(log_device_placement=False))
sess.run(init)
sess.run([conv, bias, conv1, pool1, norm1, conv2,norm2, pool2, conv3, norm3, pool3,fc4,fc5], feed_dict={x_tensor:x, y_tensor:y})
sess.run([softmax_linear,loss], feed_dict={x_tensor:x, y_tensor:y})
sess.run([lr, loss_averages_op, summary_op], feed_dict={x_tensor:x, y_tensor:y})
The problem is with this line here:
for x,y in iterate_batches(data,labels, 300,1):
This will recreate the graph however many times which is a bad thing to do as it'll take up more memory each time (this isn't always the case but it can happen).
The reuse=True comes in something like this example below when defining the graph.
# First call creates one set of variables.
result1 = my_image_filter(image1)
# Another set of variables is created in the second call.
result2 = my_image_filter(image2)
Tensorflow doesn't know whether or not you want to "reuse" the variables as in should they share the same parameters or not.
In your specific case by looping your recreating the parameters each time and telling tensorflow to simply reuse the variables.
It would be better if you could move the for loop to after the graph creation has already occurred and then you could get rid of the reuse=True everywhere.