Understanding torch and implement on tensorflow - tensorflow

I am implementing tensorflow version of LBCNN which has the code here: https://github.com/juefeix/lbcnn.torch. The problem here is that when I try to rewrite in tensorflow, the cost function keeps high and fluctuating. It mires me 2 weeks that even I debug everything, I still don't know where am I wrong.
Torch code:
-- resnet.lua
local function createModel(opt)
local function basicblock(nChIn,nChOut,sz)
local s = nn.Sequential()
local shareConv = Convolution(nChIn,nChOut,sz,sz,1,
1,(sz-1)/2,(sz-1)/2)
s:add(SBatchNorm(nChIn))
s:add(shareConv)
s:add(ReLU())
s:add(Convolution(nChOut,nChIn,1,1))
local identity = nn.Identity()
local output= nn.Sequential(): add(nn.ConcatTable()
:add(s):add(identity)):add(nn.CAddTable(true))
return output
end
local sz = opt.convSize
local nInputPlane = opt.nInputPlane
local nChIn = opt.numChannels
local nChOut = opt.numWeights
-- define model to train
model = nn.Sequential()
model:add(Convolution(nInputPlane,nChIn,sz,sz,1,1,1,1))
model:add(SBatchNorm(nChIn))
model:add(ReLU(true))
for stages = 1,opt.depth do
model:add(basicblock(nChIn,nChOut,sz))
end
model:add(Avg(5,5,5,5))
-- stage 3 : standard 2-layer neural network
model:add(nn.Reshape(nChIn*opt.view))
model:add(nn.Dropout(0.5))
model:add(nn.Linear(nChIn*opt.view,
math.max(opt.nClasses,opt.full)))
model:add(cudnn.ReLU())
model:add(nn.Dropout(0.5))
model: add(nn.Linear (math.max(opt.full,opt.nClasses), opt.nClasses))
model:cuda()
return model
end
return createModel
Tensorflow code:
def cnn(prev_input, lbc_size, lbc_channels, output_channels):
shortcut = tf.identity(prev_input)
B = tf.contrib.layers.batch_norm(prev_input)
Z = tf.contrib.layers.conv2d(inputs = B, num_outputs = lbc_channels,
kernel_size = 3, stride = 1, padding = "SAME", activation_fn = None)
A1 = tf.nn.relu(Z)
A2 = tf.contrib.layers.conv2d(inputs = A1,
num_outputs=output_channels,
kernel_size = 1, stride = 1,
padding = "SAME", activation_fn = None)
A3 = tf.add(A2, shortcut)
return A3
def model(X, Keep_probability):
with tf.name_scope("Pre-Conv"):
X1 = tf.contrib.layers.conv2d(inputs = X, num_outputs =
output_channels,kernel_size = lbc_size, stride = 1,
padding = "SAME", activation_fn = None)
X2 = tf.contrib.layers.batch_norm(X1)
X3 = tf.nn.relu(X2)
X_in = X3
for i in range(conv_layers):
with tf.name_scope("conv"):
X_out,BB,ZZ,AA,AAA = cnn(X_in, lbc_size, lbc_channels,
out_channels)
X_in = X_out
with tf.name_scope("AvgPool"):
Z = tf.nn.avg_pool(value = X_in, ksize = [1, 5, 5, 1],
strides = [1, 5, 5, 1], padding = "VALID")
with tf.name_scope("Flatter"):
P = tf.contrib.layers.flatten(Z)
with tf.name_scope("Dropout"):
F1 = tf.nn.dropout(x = P, keep_prob = 0.5)
with tf.name_scope("Fully"):
F2 = tf.contrib.layers.fully_connected(inputs = F1,
num_outputs = fc_hidden_units, activation_fn = tf.nn.relu)
with tf.name_scope("Dropout"):
F3 = tf.nn.dropout(x = F2, keep_prob = 0.5)
with tf.name_scope("Fully"):
F4 = tf.contrib.layers.fully_connected(inputs = F3,
num_outputs = output_classes, activation_fn = None)
return F4
Assume all the parameters, I passed correctly. I just want to ask that whether 2 architecture is the same or not? One more thing is their code using SGD with momentum and weight_decay while I use AdamOptimizer, is this make the difference? Thank you so much.

I did not look over your entire code to check that indeed it's the same, but usually there is a difference in examples to convergence as you change the optimizer from momentum to adam. You should also need to retune hyperparameters to get good performance.

Related

Triplet-Loss using pre-trained network

I am trying to use the Triple-Loss technique to fine-tune an EfficientNet network for human Re-ID using Keras. Here is the code I am using:
This is the generator:
class SampleGen(object):
def __init__(self, file_class_mapping):
self.file_class_mapping = file_class_mapping
self.class_to_list_files = defaultdict(list)
self.list_all_files = list(file_class_mapping.keys())
self.range_all_files = list(range(len(self.list_all_files)))
for file, class_ in file_class_mapping.items():
self.class_to_list_files[class_].append(file)
self.list_classes = list(set(self.file_class_mapping.values()))
self.range_list_classes = range(len(self.list_classes))
self.class_weight = np.array([len(self.class_to_list_files[class_]) for class_ in self.list_classes])
self.class_weight = self.class_weight / np.sum(self.class_weight)
def get_sample(self):
class_idx = np.random.choice(self.range_list_classes, 1, p=self.class_weight)[0]
examples_class_idx = np.random.choice(range(len(self.class_to_list_files[self.list_classes[class_idx]])), 2)
positive_example_1, positive_example_2 = \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[0]], \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[1]]
negative_example = None
while negative_example is None or self.file_class_mapping[negative_example] == \
self.file_class_mapping[positive_example_1]:
negative_example_idx = np.random.choice(self.range_all_files, 1)[0]
negative_example = self.list_all_files[negative_example_idx]
return positive_example_1, negative_example, positive_example_2
def read_and_resize(filepath):
im = Image.open((filepath)).convert('RGB')
im = im.resize((image_size, image_size))
return np.array(im, dtype="float32")
def augment(im_array):
if np.random.uniform(0, 1) > 0.9:
im_array = np.fliplr(im_array)
return im_array
def gen(triplet_gen):
while True:
list_positive_examples_1 = []
list_negative_examples = []
list_positive_examples_2 = []
for i in range(batch_size):
positive_example_1, negative_example, positive_example_2 = triplet_gen.get_sample()
path_pos1 = join(path_train, positive_example_1)
path_neg = join(path_train, negative_example)
path_pos2 = join(path_train, positive_example_2)
positive_example_1_img = read_and_resize(path_pos1)
negative_example_img = read_and_resize(path_neg)
positive_example_2_img = read_and_resize(path_pos2)
positive_example_1_img = augment(positive_example_1_img)
negative_example_img = augment(negative_example_img)
positive_example_2_img = augment(positive_example_2_img)
list_positive_examples_1.append(positive_example_1_img)
list_negative_examples.append(negative_example_img)
list_positive_examples_2.append(positive_example_2_img)
A = preprocess_input(np.array(list_positive_examples_1))
B = preprocess_input(np.array(list_positive_examples_2))
C = preprocess_input(np.array(list_negative_examples))
label = None
yield {'anchor_input': A, 'positive_input': B, 'negative_input': C}, label
This is how I create the model:
def get_model():
base_model = efn.EfficientNetB3(weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = Dropout(0.6)(x)
x = Dense(embedding_dim)(x)
x = Lambda(lambda x: K.l2_normalize(x, axis=1), name="enc_out")(x)
embedding_model = Model(base_model.input, x, name="embedding")
input_shape = (image_size, image_size, 3)
anchor_input = Input(input_shape, name='anchor_input')
positive_input = Input(input_shape, name='positive_input')
negative_input = Input(input_shape, name='negative_input')
anchor_embedding = embedding_model(anchor_input)
positive_embedding = embedding_model(positive_input)
negative_embedding = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [anchor_embedding, positive_embedding, negative_embedding]
triplet_model = Model(inputs, outputs)
triplet_model.add_loss(K.mean(triplet_loss(outputs)))
return embedding_model, triplet_model
And this is how I'm trying to run the training:
if __name__ == '__main__':
data = pd.read_csv(path_csv)
train, test = train_test_split(data, train_size=0.7, random_state=1337)
file_id_mapping_train = {k: v for k, v in zip(train.Image.values, train.Id.values)}
file_id_mapping_test = {k: v for k, v in zip(test.Image.values, test.Id.values)}
gen_tr = gen(SampleGen(file_id_mapping_train))
gen_te = gen(SampleGen(file_id_mapping_test))
embedding_model, triplet_model = get_model()
for i, layer in enumerate(embedding_model.layers):
print(i, layer.name, layer.trainable)
for layer in embedding_model.layers[379:]:
layer.trainable = True
for layer in embedding_model.layers[:379]:
layer.trainable = False
triplet_model.compile(loss=None, optimizer=Adam(0.0001))
history = triplet_model.fit(x=gen_tr,
validation_data=gen_te,
epochs=10,
verbose=1,
steps_per_epoch=200,
validation_steps=20,
callbacks=create_callbacks())
The csv contains two columns (Image, Id) and I am generating triplets on the go using a generator. The layer 379 is the last layer of the network so I just leave that as trainable. I let it run for some epochs and it seems like it doesn't converge, it stays around 2.30. On epochs like 20, the loss is even higher than what I've started with. Here you can see what I mean: train example Is there anything wrong with the way I think about the problem?
Thank you!

Tensorflow : train on mini batch, fast then slow

I am a beginner in tensorflow and I am trying to train a model using "mini batch". To do that I created a generator and iterate it. The problem I encounter is that, at the beginning of the epoch, the train seems fast (many batch per seconds) then the train slow down (1 batch per second) so I am wondering where I am wrong in my code but I do not find the problem.
def prepare_data(filename):
'''load file which give path and label for the data'''
f = open(filename, 'r')
data = [line.split() for line in f]
feat =[]
label=[]
for l in data:
feat.append(l[0])
label.append(l[1])
n_samples = len(feat)
shuf = list(range(n_samples))
random.shuffle(shuf)
count = Counter(label)
print(count)
feature = [feat[i] for i in shuf]
label = np.array(label, dtype=np.int)
return feature, label[shuf]
def get_specgrams(paths, nsamples=16000):
'''
Given list of paths, return specgrams.
'''
# read the wav files
wavs = [wavfile.read(x)[1] for x in paths]
# zero pad the shorter samples and cut off the long ones.
data = []
for wav in wavs:
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
data.append(d)
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return np.asarray(data)
def get_specgram(path, nsamples=16000):
'''
Given path, return specgrams.
'''
# read the wav files
wav = wavfile.read(path)[1]
# zero pad the shorter samples and cut off the long ones.
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return d
# multci classification binary labels
def one_hot_encode(labels, n_unique_labels=31):
n_labels = len(labels)
#print('number of unique labels:', n_unique_labels)
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return np.array(one_hot_encode, dtype=np.int)
#create_path_file('train/audio/')
def model(tr_features, tr_labels, ts_features, ts_labels):
# remove gpu device error
config = tf.ConfigProto(allow_soft_placement = True)
# parameters
BATCH_SIZE = 4
number_loop = math.ceil(len(tr_features)/BATCH_SIZE)
training_epochs = 10
n_dim = 16000
n_classes = 31 #len(np.unique(ts_labels))
n_hidden_units_one = 280
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.1
# get test data
ts_features, ts_labels = get_data(ts_features, ts_labels)
# Model
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
init = tf.initialize_all_variables()
# function and optimizers
cost_function = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train loop
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session(config = config) as sess:
sess.run(init)
for epoch in range(training_epochs):
print(' ## Epoch n°', epoch+1 )
batch = batch_generator(BATCH_SIZE, tr_features, tr_labels)
acc_total = 0.0
for cpt, (train_features_batch, train_labels_batch) in enumerate(batch):
_,cost = sess.run([optimizer,cost_function],feed_dict={X:train_features_batch,Y:train_labels_batch})
cost_history = np.append(cost_history,cost)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = accuracy.eval(feed_dict={X:train_features_batch,Y:train_labels_batch})
acc_total = (acc_total*cpt + acc)/(cpt+1)
print('Train accuracy : ', acc_total, '[',str(cpt+1), '/',str(number_loop), ']' ,flush=True, end='\r')
clear_output()
print('Train accuracy : ', acc_total)
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
y_true = sess.run(tf.argmax(ts_labels,1))
print('Test accuracy: ', round(sess.run(accuracy, feed_dict={X: ts_features, Y: ts_labels}) , 3))
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
def batch_generator(batch_size, feat_path, labels):
n_sample = len(feat_path)
ite = math.ceil(n_sample/batch_size)
for i in range(0, ite):
if i == ite-1:
label = one_hot_encode(labels[-batch_size:])
feat = get_specgrams(feat_path[-batch_size:])
yield (feat, label)
else:
label = one_hot_encode(labels[i*batch_size:i*batch_size+batch_size])
feat = get_specgrams(feat_path[i*batch_size:i*batch_size+batch_size])
yield (feat, label)
def get_data(feat_path, labels):
feat = get_specgrams(feat_path)
label = one_hot_encode(labels)
return feat, label
def __main__():
print('## Load data and shuffle')
feat_path, labels = prepare_data('data_labelised2.txt')
idx = int(len(labels)*0.8)
print("## Create Model")
model(feat_path[0:idx], labels[0:idx], feat_path[idx+1:], labels[idx+1:])
with tf.device('/gpu:0'):
__main__()

Tensorflow: Saving/importing checkpoint works without error, but all imported variables have value 'none'

I am training a deep CNN for image augmentation and have run into a very odd issue.
My network architecture is fully convolutional and implements several small "u-shaped" components, wherein feature maps are down/upsampled in order to be processed throughout a "top layer." In the top layer, there are several nodes where the network "guesses" the output image, and then adds the output of the lower layers to the features derived from the guess. The loss function I have penalizes error in the final prediction as well as these guesses.
The network is defined thusly:
def convnet(x, weights, biases):
#TOP LAYER
conv0_1 = conv3dWrap(x, weights['wConv0_1'], biases['bConv0_1'],[1,1,1,1,1])
conv0_2 = conv3dWrap(conv0_1, weights['wConv0_2'], biases['bConv0_2'],[1,1,1,1,1])
#MID LAYER DOWN SAMPLE
conv1_1 = conv3dWrap(conv0_2, weights['wConv1_1'], biases['bConv1_1'],[1,2,2,2,1])
conv1_2 = conv3dWrap(conv1_1, weights['wConv1_2'], biases['bConv1_2'],[1,1,1,1,1])
#BOTTOM LAYER DOWN SAMPLE
conv2_1 = conv3dWrap(conv1_2, weights['wConv2_1'], biases['bConv2_1'],[1,2,2,2,1])
conv2_2 = conv3dWrap(conv2_1, weights['wConv2_2'], biases['bConv2_2'],[1,1,1,1,1])
conv2_3 = conv3dWrap(conv2_2, weights['wConv2_3'], biases['bConv2_3'],[1,1,1,1,1])
convTrans2_1 = conv3dTransWrap(conv2_3,weights['wTConv2_1'], biases['bTConv2_1'], [4,2,32,32,64],[1,2,2,2,1])
#MID LAYER UPSAMPLE
conv1_3 = conv3dWrap(tf.add(convTrans2_1,conv1_2),weights['wConv1_3'], biases['bConv1_3'],[1,1,1,1,1])
conv1_4 = conv3dWrap(conv1_3, weights['wConv1_4'], biases['bConv1_4'],[1,1,1,1,1])
convTrans1_1 = conv3dTransWrap(conv1_4, weights['wTConv1_1'], biases['bTConv1_1'], [4,4,64,64,32],[1,2,2,2,1])
#TOP LAYER AGAIN
conv0_3 = conv3dWrap(tf.add(conv0_2,convTrans1_1), weights['wConv0_3'], biases['bConv0_3'],[1,1,1,1,1])
conv0_4 = conv3dWrap(conv0_3, weights['wConv0_4'], biases['bConv0_4'],[1,1,1,1,1])
recon0_1 = reconWrap(conv0_3, weights['wReconDS0_1'], biases['bReconDS0_1'],[1,1,1,1,1])
print(recon0_1.shape)
catRecon0_1 = tf.add(conv0_4,tf.contrib.keras.backend.repeat_elements(recon0_1,32,4))
conv0_5 = conv3dWrap(catRecon0_1, weights['wConv0_5'], biases['bConv0_5'],[1,1,1,1,1])
#MID LAYER AGAIN
conv1_5 = conv3dWrap(conv0_5, weights['wConv1_5'], biases['bConv1_5'],[1,2,2,2,1])
conv1_6 = conv3dWrap(conv1_5, weights['wConv1_6'], biases['bConv1_6'],[1,1,1,1,1])
#BOTTOM LAYER
conv2_4 = conv3dWrap(conv1_6, weights['wConv2_4'], biases['bConv2_4'],[1,2,2,2,1])
conv2_5 = conv3dWrap(conv2_4, weights['wConv2_5'], biases['bConv2_5'],[1,1,1,1,1])
conv2_6 = conv3dWrap(conv2_5, weights['wConv2_6'], biases['bConv2_6'],[1,1,1,1,1])
convTrans2_2 = conv3dTransWrap(conv2_6,weights['wTConv2_2'], biases['bTConv2_2'], [4,2,32,32,64],[1,2,2,2,1])
#MID LAYER UPSAMPLE
conv1_7 = conv3dWrap(tf.add(convTrans2_2,conv1_6),weights['wConv1_7'], biases['bConv1_7'],[1,1,1,1,1])
conv1_8 = conv3dWrap(conv1_7, weights['wConv1_8'], biases['bConv1_8'],[1,1,1,1,1])
convTrans1_2 = conv3dTransWrap(conv1_8,weights['wTConv1_2'], biases['bTConv1_2'], [4,4,64,64,32],[1,2,2,2,1])
#TOP LAYER
conv0_6 = conv3dWrap(tf.add(conv0_5,convTrans1_2), weights['wConv0_6'], biases['bConv0_6'],[1,1,1,1,1])
recon0_2 = reconWrap(conv0_6, weights['wReconDS0_2'], biases['bReconDS0_2'],[1,1,1,1,1])
catRecon0_2 = tf.add(conv0_6,tf.contrib.keras.backend.repeat_elements(recon0_2,32,4))
conv0_7 = conv3dWrap(catRecon0_2, weights['wConv0_7'], biases['bConv0_7'],[1,1,1,1,1])
#MID LAYER
conv1_9 = conv3dWrap(conv0_7, weights['wConv1_9'], biases['bConv1_9'],[1,2,2,2,1])
conv1_10 = conv3dWrap(conv1_9, weights['wConv1_10'], biases['bConv1_10'],[1,1,1,1,1])
#BOTTOM LAYER
conv2_7 = conv3dWrap(conv1_10, weights['wConv2_7'], biases['bConv2_7'],[1,2,2,2,1])
conv2_8 = conv3dWrap(conv2_7, weights['wConv2_8'], biases['bConv2_8'],[1,1,1,1,1])
conv2_9 = conv3dWrap(conv2_8, weights['wConv2_9'], biases['bConv2_9'],[1,1,1,1,1])
convTrans2_3 = conv3dTransWrap(conv2_9, weights['wTConv2_3'], biases['bTConv2_3'], [4,2,32,32,64],[1,2,2,2,1])
#MID LAYER UPSAMPLE
conv1_11 = conv3dWrap(tf.add(convTrans2_3,conv1_10),weights['wConv1_11'], biases['bConv1_11'],[1,1,1,1,1])
conv1_12 = conv3dWrap(conv1_11, weights['wConv1_12'], biases['bConv1_12'],[1,1,1,1,1])
convTrans1_3 = conv3dTransWrap(conv1_12,weights['wTConv1_3'], biases['bTConv1_3'], [4,4,64,64,32],[1,2,2,2,1])
#TOP LAYER
conv0_8 = conv3dWrap(tf.add(conv0_7,convTrans1_3), weights['wConv0_8'], biases['bConv0_8'],[1,1,1,1,1])
recon0_3 = reconWrap(conv0_8, weights['wReconDS0_3'], biases['bReconDS0_3'],[1,1,1,1,1])
catRecon0_3 = tf.add(conv0_8,tf.contrib.keras.backend.repeat_elements(recon0_3,32,4))
conv0_9 = conv3dWrap(catRecon0_3, weights['wConv0_9'], biases['bConv0_9'],[1,1,1,1,1])
print(recon0_3.shape)
#MID LAYER
conv1_13 = conv3dWrap(conv0_9, weights['wConv1_13'], biases['bConv1_13'],[1,2,2,2,1])
conv1_14 = conv3dWrap(conv1_13, weights['wConv1_14'], biases['bConv1_14'],[1,1,1,1,1])
#BOTTOM LAYER
conv2_10 = conv3dWrap(conv1_14, weights['wConv2_10'], biases['bConv2_10'],[1,2,2,2,1])
conv2_11 = conv3dWrap(conv2_10, weights['wConv2_11'], biases['bConv2_11'],[1,1,1,1,1])
conv2_12 = conv3dWrap(conv2_11, weights['wConv2_12'], biases['bConv2_12'],[1,1,1,1,1])
convTrans2_4 = conv3dTransWrap(conv2_12, weights['wTConv2_4'], biases['bTConv2_4'], [4,2,32,32,64],[1,2,2,2,1])
#MID LAYER UPSAMPLE
conv1_15 = conv3dWrap(tf.add(convTrans2_4,conv1_14),weights['wConv1_15'], biases['bConv1_15'],[1,1,1,1,1])
conv1_16 = conv3dWrap(conv1_15, weights['wConv1_16'], biases['bConv1_16'],[1,1,1,1,1])
convTrans1_4 = conv3dTransWrap(conv1_16,weights['wTConv1_4'], biases['bTConv1_4'], [4,4,64,64,32],[1,2,2,2,1])
#TOP LAYER
conv0_10 = conv3dWrap(tf.add(conv0_9,convTrans1_4), weights['wConv0_10'], biases['bConv0_10'],[1,1,1,1,1])
#OUTPUT
convOUT = reconWrap(conv0_10, weights['wConvOUT'], biases['bConvOUT'],[1,1,1,1,1])
print(convOUT.shape)
return recon0_1, recon0_2, recon0_3, convOUT
Where all of the "wrappers" are as follows:
def conv3dWrap(x, W, b, strides):
x = tf.nn.conv3d(x, W, strides, padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def reconWrap(x, W, b, strides):
x = tf.nn.conv3d(x, W, strides, padding='SAME')
x = tf.nn.bias_add(x, b)
return x
def conv3dTransWrap(x, W, b, shape, strides):
x = tf.nn.conv3d_transpose(x, W, shape, strides, padding='SAME')
x = tf.nn.bias_add(x,b)
return tf.nn.relu(x)
My weights and biases are stored in dictionaries that are defined before starting the training:
weights={
#TOP LAYER
'wConv0_1': tf.Variable(tf.random_normal([4, 3, 3, 1, 5]), name='wC0_1'),
'wConv0_2': tf.Variable(tf.random_normal([4, 3, 3, 5, 32]), name='wC0_2'),
'wConv0_3': tf.Variable(tf.random_normal([4, 3, 3, 32, 32]), name='wC0_3'),
'wConv0_4': tf.Variable(tf.random_normal([4, 3, 3, 32, 32]), name='wC0_4'),
'wReconDS0_1': tf.Variable(tf.random_normal([1, 1, 1, 32, 1]) , name='wR0_1') ...... #THIS CONTINUES FOR QUITE AWHILE
Then, I begin the training like this:
def train_cnn(x):
epochLosses=[]
print('Beginning Training!')
print(NUM_EPOCHS)
r1,r2,r3,pred = convNet(x, weights, biases)
cost = (tf.losses.mean_squared_error(y,pred)
+ 0.25* ((tf.losses.mean_squared_error(y,r1))
+ (tf.losses.mean_squared_error(y,r2))
+ (tf.losses.mean_squared_error(y,r3))))
regularizer= 0.01*tf.nn.l2_loss((weights['wConv0_1'])+
0.01*tf.nn.l2_loss(weights['wConv0_2'])+
0.01*tf.nn.l2_loss(weights['wConv0_3'])+
0.01*tf.nn.l2_loss(weights['wConv0_4'])+
0.01*tf.nn.l2_loss(weights['wReconDS0_1'])+
0.01*tf.nn.l2_loss(weights['wConv0_5'])+
0.01*tf.nn.l2_loss(weights['wConv0_6'])+
0.01*tf.nn.l2_loss(weights['wReconDS0_2'])+
0.01*tf.nn.l2_loss(weights['wReconDS0_3'])+
0.01*tf.nn.l2_loss(weights['wConv0_7'])+
0.01*tf.nn.l2_loss(weights['wConv0_8'])+
0.01*tf.nn.l2_loss(weights['wConv0_9'])+
0.01*tf.nn.l2_loss(weights['wConv0_10'])+
0.01*tf.nn.l2_loss(weights['wConvOUT'])+
0.01*tf.nn.l2_loss(weights['wConv1_1'])+
0.01*tf.nn.l2_loss(weights['wConv1_2'])+
0.01*tf.nn.l2_loss(weights['wConv1_3'])+
0.01*tf.nn.l2_loss(weights['wConv1_4'])+
0.01*tf.nn.l2_loss(weights['wConv1_5'])+
0.01*tf.nn.l2_loss(weights['wConv1_6'])+
0.01*tf.nn.l2_loss(weights['wConv1_7'])+
0.01*tf.nn.l2_loss(weights['wConv1_8'])+
0.01*tf.nn.l2_loss(weights['wConv1_9'])+
0.01*tf.nn.l2_loss(weights['wConv1_10'])+
0.01*tf.nn.l2_loss(weights['wConv1_11'])+
0.01*tf.nn.l2_loss(weights['wConv1_12'])+
0.01*tf.nn.l2_loss(weights['wConv1_13'])+
0.01*tf.nn.l2_loss(weights['wConv1_14'])+
0.01*tf.nn.l2_loss(weights['wConv1_15'])+
0.01*tf.nn.l2_loss(weights['wConv1_16'])+
0.01*tf.nn.l2_loss(weights['wTConv1_1'])+
0.01*tf.nn.l2_loss(weights['wTConv1_2'])+
0.01*tf.nn.l2_loss(weights['wTConv1_3'])+
0.01*tf.nn.l2_loss(weights['wTConv1_4'])+
0.01*tf.nn.l2_loss(weights['wConv2_1'])+
0.01*tf.nn.l2_loss(weights['wConv2_2'])+
0.01*tf.nn.l2_loss(weights['wConv2_3'])+
0.01*tf.nn.l2_loss(weights['wConv2_4'])+
0.01*tf.nn.l2_loss(weights['wConv2_5'])+
0.01*tf.nn.l2_loss(weights['wConv2_6'])+
0.01*tf.nn.l2_loss(weights['wConv2_7'])+
0.01*tf.nn.l2_loss(weights['wConv2_8'])+
0.01*tf.nn.l2_loss(weights['wConv2_9'])+
0.01*tf.nn.l2_loss(weights['wConv2_10'])+
0.01*tf.nn.l2_loss(weights['wConv2_11'])+
0.01*tf.nn.l2_loss(weights['wConv2_12'])+
0.01*tf.nn.l2_loss(weights['wTConv2_1'])+
0.01*tf.nn.l2_loss(weights['wTConv2_2'])+
0.01*tf.nn.l2_loss(weights['wTConv2_3'])+
0.01*tf.nn.l2_loss(weights['wTConv2_4']))
cost=cost+regularizer
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cost)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
valLosses=[]
epochLosses=[]
print('Beginning Session!')
writer = tf.summary.FileWriter ( './GRAPH' , sess.graph)
sess.run(tf.global_variables_initializer())
Finally, I go ahead and do some stuff for loading in the batches and, once they're ready, I do the following (for each pass, I won't do the saving every pass once I have the weight importing working):
_, c = sess.run([optimizer, cost], feed_dict = {x: inBatch,y: gsBatch})
epoch_loss += c
save_path = saver.save(sess, "./CHKPT/model.cpkt")
So when I go ahead and import this model
sess = tf.Session()
x = tf.placeholder(dtype=tf.float32)
new_saver = tf.train.import_meta_graph('./CHKPT/model.cpkt.meta')
sess.run(tf.global_variables_initializer())
a,b,c,pred = convNet(x, weights, biases)
I am met with the following error:
ValueError: Tried to convert 'filter' to a tensor and failed. Error: None values not supported.
When I look at the imported weights and biases, each of them have value 'None'. Not only is this odd, but the network 'runs' incredibly quickly during training, far far more quickly than I'd expect. I am worried that no legitimate computations are occurring.
This must not be the case, but, I am almost positive I am following the saving/loading process I've used for many other networks verbatim. Can anyone shed some light on what might be happening here?
Edit: I'm also very new to TF, and it's likely there are non-idealities in my code. If you see anything outside of the saving/importing that isn't kosher please let me know.
Running sess.run(tf.global_variables_initializer()) will reinitialize every tensor and delete their loaded values. Skip calling tf.global_variables_initializer() when you load a model. The initialization is done by the saver.
You are also missing the restore call (import_meta_graph() only loads the saver object).
new_saver = tf.train.import_meta_graph('./CHKPT/model.cpkt.meta')
new_saver.restore(sess, './CHKPT/model.cpkt')
Thereafter when you run:
a,b,c,pred = convNet(x, weights, biases)
you create an all new network and never use the loaded one.
Instead you have to find the tensors you need inside tf.global_variables() after restoring the model. For example by searching for them by name.

ctc_loss error "No valid path found."

Training a model with tf.nn.ctc_loss produces an error every time the train op is run:
tensorflow/core/util/ctc/ctc_loss_calculator.cc:144] No valid path found.
Unlike in previous questions about this function, this is not due to divergence. I have a low learning rate, and the error occurs on even the first train op.
The model is a CNN -> LSTM -> CTC. Here is the model creation code:
# Build Graph
self.videoInput = tf.placeholder(shape=(None, self.maxVidLen, 50, 100, 3), dtype=tf.float32)
self.videoLengths = tf.placeholder(shape=(None), dtype=tf.int32)
self.keep_prob = tf.placeholder(dtype=tf.float32)
self.targets = tf.sparse_placeholder(tf.int32)
self.targetLengths = tf.placeholder(shape=(None), dtype=tf.int32)
conv1 = tf.layers.conv3d(self.videoInput ...)
pool1 = tf.layers.max_pooling3d(conv1 ...)
conv2 = ...
pool2 = ...
conv3 = ...
pool3 = ...
cnn_out = tf.reshape(pool3, shape=(-1, self.maxVidLength, 4*7*96))
fw_cell = tf.nn.rnn_cell.MultiRNNCell(self.cell(), for _ in range(3))
bw_cell = tf.nn.rnn_cell.MultiRNNCell(self.cell(), for _ in range(3))
outputs, _ = tf.nn.bidirectional_dynamic_rnn(
fw_cell, bw_cell, cnn_out, sequence_length=self.videoLengths, dtype=tf.float32)
outputs = tf.concat(outputs, 2)
outputs = tf.reshape(outputs, [-1, self.hidden_size * 2])
w = tf.Variable(tf.random_normal((self.hidden_size * 2, len(self.char2index) + 1), stddev=0.2))
b = tf.Variable(tf.zeros(len(self.char2index) + 1))
out = tf.matmul(outputs, w) + b
out = tf.reshape(out, [-1, self.maxVidLen, len(self.char2index) + 1])
out = tf.transpose(out, [1, 0, 2])
cost = tf.reduce_mean(tf.nn.ctc_loss(self.targets, out, self.targetLengths))
self.train_op = tf.train.AdamOptimizer(0.0001).minimize(cost)
And here is the feed dict creation code:
indices = []
values = []
shape = [len(vids) * 2, self.maxLabelLen]
vidInput = np.zeros((len(vids) * 2, self.maxVidLen, 50, 100, 3), dtype=np.float32)
# Actual video, then left-right flip
for j in range(len(vids) * 2):
# K is video index
k = j if j < len(vids) else j - len(vids)
# convert video and label to input format
vidInput[j, 0:len(vids[k])] = vids[k] if k == j else vids[k][:,::-1,:]
indices.extend([j, i] for i in range(len(labelList[k])))
values.extend(self.char2index[c] for c in labelList[k])
fd[self.targets] = (indices, values, shape)
fd[self.videoInput] = vidInput
# Collect video lengths and label lengths
vidLengths = [len(j) for j in vids] + [len(j) for j in vids]
labelLens = [len(l) for l in labelList] + [len(l) for l in labelList]
fd[self.videoLengths] = vidLengths
fd[self.targetLengths] = labelLens
It turns out that the ctc_loss requires that the label lengths be shorter than the input lengths. If the label lengths are too long, the loss calculator cannot unroll completely and therefore cannot compute the loss.
For example, the label BIFI would require input length of at least 4 while the label BIIF would require input length of at least 5 due to a blank being inserted between the repeated symbols.
I had the same issue but I soon realized it was just because I was using glob and my label was in the filename so it was exceeding.
You can fix this issue by using:
os.path.join(*(filename.split(os.path.sep)[noOfDir:]))
For me the problem was fixed by setting preprocess_collapse_repeated=True.
FWIW: My target sequence length was already shorter than inputs, and the RNN outputs are that of softmax.
Another possible reason which I found out in my case is the input data range is not normalized to 0~1, due to that LSTM activation function becomes saturated in the beginning of the training, and causes "no valid path" log somehow.

Tensorflow - apply function over 1D Tensor

I have a function dice
def dice(yPred,yTruth,thresh):
smooth = tf.constant(1.0)
threshold = tf.constant(thresh)
yPredThresh = tf.to_float(tf.greater_equal(yPred,threshold))
mul = tf.mul(yPredThresh,yTruth)
intersection = 2*tf.reduce_sum(mul) + smooth
union = tf.reduce_sum(yPredThresh) + tf.reduce_sum(yTruth) + smooth
dice = intersection/union
return dice, yPredThresh
which works. An example is given here
with tf.Session() as sess:
thresh = 0.5
print("Dice example")
yPred = tf.constant([0.1,0.9,0.7,0.3,0.1,0.1,0.9,0.9,0.1],shape=[3,3])
yTruth = tf.constant([0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0],shape=[3,3])
diceScore, yPredThresh= dice(yPred=yPred,yTruth=yTruth,thresh= thresh)
diceScore_ , yPredThresh_ , yPred_, yTruth_ = sess.run([diceScore,yPredThresh,yPred, yTruth])
print("\nScore = {0}".format(diceScore_))
>>> Score = 0.899999976158
I would like to be able to loop over the third arguement of dice, thresh. I do not know the best way to do this such that I can extract it from the graph. Something along the lines of the following...
def diceROC(yPred,yTruth,thresholds=np.linspace(0.1,0.9,20)):
thresholds = thresholds.astype(np.float32)
nThreshs = thresholds.size
diceScores = tf.zeros(shape=nThreshs)
for i in xrange(nThreshs):
score,_ = dice(yPred,yTruth,thresholds[i])
diceScores[i] = score
return diceScores
Evaluating diceScoreROC yields the error 'Tensor' object does not support item assignment as I can't loop into and slice a tf tensor apparently.
Instead of the loop, I would encourage you to use broadcasting abilities of tensorflow. If you redefine dice to:
def dice(yPred,yTruth,thresh):
smooth = tf.constant(1.0)
yPredThresh = tf.to_float(tf.greater_equal(yPred,thresh))
mul = tf.mul(yPredThresh,yTruth)
intersection = 2*tf.reduce_sum(mul, [0, 1]) + smooth
union = tf.reduce_sum(yPredThresh, [0, 1]) + tf.reduce_sum(yTruth, [0, 1]) + smooth
dice = intersection/union
return dice, yPredThresh
You will be able to pass 3-dimensional yPred and yTruth (assuming the tensors will be just repeated along the last dimension) and 1-dimensional thresh:
with tf.Session() as sess:
thresh = [0.1,0.9,20, 0.5]
print("Dice example")
yPred = tf.constant([0.1,0.9,0.7,0.3,0.1,0.1,0.9,0.9,0.1],shape=[3,3,1])
ypred_tiled = tf.tile(yPred, [1,1,4])
yTruth = tf.constant([0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0],shape=[3,3,1])
ytruth_tiled = tf.tile(yTruth, [1,1,4])
diceScore, yPredThresh= dice(yPred=ypred_tiled,yTruth=ytruth_tiled,thresh= thresh)
diceScore_ = sess.run(diceScore)
print("\nScore = {0}".format(diceScore_))
You'll get:
Score = [ 0.73333335 0.77777779 0.16666667 0.89999998]