TensorFlow: string_input_producer output string not in order with shuffle = False - tensorflow

I got 2 list of filenames like filenames_L = [1a,2a,3a,4a,...] and filenames_R = [1b,2b,3b,4b,...] , and I use the code below to make 2 queues.
"""queue for left images"""
filenames_L = reader.file_name('stereo_dataset/fly_frames_cleanpass/TRAIN', 'Left', 'png')
png = filenames_L[0].lower().endswith('png') # If first file is a png, assume they all are
filenames_L = tf.convert_to_tensor(filenames_L)
filename_queue_L = tf.train.string_input_producer(filenames_L, shuffle=False, num_epochs=FLAGS.epoch)
reader_L = tf.WholeFileReader()
name_L, img_bytes_L = reader_L.read(filename_queue_L)
image_L = tf.image.decode_png(img_bytes_L, channels=3) if png else tf.image.decode_jpeg(img_bytes_L, channels=3)
processed_image_L = image_preprocessing_fn(image_L, FLAGS.height, FLAGS.width)
processed_images_L = tf.train.batch([processed_image_L], FLAGS.batch_size, dynamic_pad=True)
"""queue for right images"""
filenames_R = reader.file_name('stereo_dataset/fly_frames_cleanpass/TRAIN', 'Right', 'png')
filenames_R = tf.convert_to_tensor(filenames_R)
filename_queue_R = tf.train.string_input_producer(filenames_R, shuffle=False, num_epochs=FLAGS.epoch)
reader_R = tf.WholeFileReader()
name_R, img_bytes_R = reader_R.read(filename_queue_R)
image_R = tf.image.decode_png(img_bytes_R, channels=3) if png else tf.image.decode_jpeg(img_bytes_R, channels=3)
processed_image_R = image_preprocessing_fn(image_R, FLAGS.height, FLAGS.width)
processed_images_R = tf.train.batch([processed_image_R], FLAGS.batch_size, dynamic_pad=True)
And then I use the code below to get their names.
with tf.Session(config=config) as sess:
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
start_time = time.time()
while not coord.should_stop():
_, loss_t, step, name_Left, name_Right = sess.run([train_op, loss, global_step, name_L, name_R], feed_dict={disparity_map: disparity})
What I got from name_Left, name_Right are (3a,3b), (5a, 5b)....But I expected it outputs like(1a,1b), (2a, 2b)...

I had a similar issue using tf.train.batch and tf.train.shuffle_batch.
The problem was in the use of threads (similar to this: https://github.com/tensorflow/tensorflow/issues/410).
I solved it by setting only one thread. When I did that, the order was preserved.

Related

How to concat laserembeddings with huggingface funnel transformers simple CLS output for NLP sequence classification task?

i was approaching NLP sequence classification problem (3 classes) using huggingface transformers (funnel-transformer/large) and tensorflow.
first i created laserembedding like this :
from laserembeddings import Laser
laser = Laser()
df = pd.read_csv("mycsv.csv")
embeds = laser.embed_sentences(df['text'].values, lang='en')
write_pickle_to_file('train.pkl', embeds )
part 1 : Tensorflow version
for data preparation i use code like below :
df['text']=temp['column1']+tokenizer.sep_token+temp['column2']+tokenizer.sep_token+temp['column3']
def encode_text(texts):
enc_di = tokenizer.batch_encode_plus(
texts,
padding='max_length',
truncation=True,
return_token_type_ids=True,
pad_to_max_length=True,
max_length=cfg.max_len
)
return [np.asarray(enc_di['input_ids'], dtype=np.int64),
np.asarray(enc_di['attention_mask'], dtype=np.int64),
np.asarray(enc_di['token_type_ids'], dtype=np.int64)]
then inside training function :
x_train = encode_text(df.text.to_list())
train_ds = (
tf.data.Dataset
.from_tensor_slices((
{
"input_ids": x_train[0],
"input_masks": x_train[1],
"input_segments": x_train[2],
"lasers": np.array( train[laser_columns].values, dtype=np.float32 ) #laser_columns contains all the laser embedded columns
},
tf.one_hot(df["label"].to_list(), 3) #3 class
))
.repeat()
.shuffle(2048)
.batch(cfg.batch_size)
.prefetch(AUTO)
)
i add laser embedding in my model like this :
def create_model():
transformer = transformers.TFAutoModel.from_pretrained(cfg.pretrained,config=config,from_pt=True)
max_len=512
# transformer
input_ids = Input(shape=(max_len,), dtype="int32", name="input_ids")
input_masks = Input(shape=(max_len,), dtype="int32", name="input_masks")
input_segments = Input(shape=(max_len,), dtype="int32", name="input_segments")
sequence_output = transformer(input_ids, attention_mask=input_masks, token_type_ids=input_segments)[0]
cls_token = sequence_output[:, 0, :]
# lasers
lasers = Input(shape=(n_lasers,), dtype=tf.float32, name="lasers") #n_lasers = 1024
lasers_output = tf.keras.layers.Dense(n_lasers, activation='tanh')(lasers)
x = tf.keras.layers.Concatenate()([cls_token, lasers_output])
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(2048, activation='tanh')(x)
x = tf.keras.layers.Dropout(0.1)(x)
out = tf.keras.layers.Dense(3, activation='softmax')(x)
model = Model(inputs=[input_ids, input_masks, input_segments, lasers], outputs=out)
model.compile(Adam(lr=1e-5), loss=losses.CategoricalCrossentropy(), metrics=["acc", metrics.CategoricalCrossentropy(name='xentropy')])
return model
now my question is, how do we do the same with pytorch for exact same problem and same dataset?
part 2 : pytorch version
df = pd.read_csv("mytrain.csv")
class myDataset(Dataset):
def __init__(self,df, max_length, tokenizer, training=True):
self.df = df
self.max_len = max_length
self.tokenizer = tokenizer
self.column1 = self.df['column1'].values
self.column2 = self.df['column2'].values
self.column3= self.df['column3'].values
self.column4= self.df['column4'].values
self.training = training
if self.training:
self.targets = self.df['label'].values
def __len__(self):
return len(self.df)
def __getitem__(self, index):
column1 = self.column1[index]
column2= self.column2[index]
column3= self.column3[index]
text0 = self.column4[index]
text1 = column1 + ' ' + column2+ ' ' + column3
inputs = self.tokenizer.encode_plus(
text1 ,
text0 ,
truncation = True,
add_special_tokens = True,
return_token_type_ids = True,
is_split_into_words=False,
max_length = self.max_len
)
samples = {
'input_ids': inputs['input_ids'],
'attention_mask': inputs['attention_mask'],
}
if 'token_type_ids' in inputs:
samples['token_type_ids'] = inputs['token_type_ids']
if self.training:
samples['target'] = self.targets[index]
return samples
collate_fn = DataCollatorWithPadding(tokenizer=CONFIG['tokenizer'])
class myModel(nn.Module):
def __init__(self, model_name):
super(myModel, self).__init__()
self.model = AutoModel.from_pretrained(model_name)
if(True):
print("using gradient_checkpoint...")
self.model.gradient_checkpointing_enable()
self.config = AutoConfig.from_pretrained(model_name)
self.config.update(
{
"output_hidden_states": True,
"hidden_dropout_prob": 0.0,
"layer_norm_eps": 1e-7,
"add_pooling_layer": False,
"attention_probs_dropout_prob":0.0,
}
)
self.fc = nn.Linear(self.config.hidden_size, 3)
def forward(self, ids, mask):
out = self.model(input_ids=ids,attention_mask=mask,output_hidden_states=False)
out = out[0][:, 0, :]
outputs = self.fc(out)
return outputs
and in train and validation loop i have code like this :
bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
ids = data['input_ids'].to(device, dtype = torch.long)
mask = data['attention_mask'].to(device, dtype = torch.long)
targets = data['target'].to(device, dtype=torch.long)
batch_size = ids.size(0)
optimizer.zero_grad()
# forward pass with `autocast` context manager
with autocast(enabled=True):
outputs = model(ids, mask)
loss = loss_fct(outputs, targets)
i would like to know where and how in my huggingface pytorch pipeline i can use the laserembedding that i created earlier and used in tensorflow huggingface model?
i would like to concat laserembeddings with funnel transformer's simple CLS token output and train the transformers model with laser embed as extra feature in pytorch implementation exactly like i did in tensorflow example,do you know how to modify my pytorch code to make it working in pytorch? the tensorflow implementation with laserembedding concatenated above that i have posted here works good,i just wanted to do the same in pytorch implementation,,your help is highly appreciated,thanks in advance

Resource exhausted: OOM model.fit in foor loop grid search cross validation

I am trying to do a grid search by calling model.fit recursively for different parameters of my model.
I get a resource exhausted error in tensorflow. In spite of doing del model and tf.keras.backend.clear_session() at the end of the loop. This is my code
def kfoldsplit(FRAME_PATH, MASK_PATH,k):
kfold = []
all_frames = os.listdir(FRAME_PATH)
all_masks = os.listdir(MASK_PATH)
all_frames.sort(key=lambda var: [int(x) if x.isdigit() else x
for x in re.findall(r'[^0-9]|[0-9]+', var)])
all_masks.sort(key=lambda var: [int(x) if x.isdigit() else x
for x in re.findall(r'[^0-9]|[0-9]+', var)])
random.seed(230)
random.shuffle(all_frames)
# Generate train, val, and test sets for frames
train_split = int(0.8 * len(all_frames))
#val_split = int(0.9 * len(all_frames))
#test_split = int(0.9 * len(all_frames))
train_frames = all_frames[:train_split]
#val_frames = all_frames[train_split:val_split]
test_frames = all_frames[train_split:]
# Generate corresponding mask lists for masks
train_masks = [f for f in all_masks if 'image_' + f[6:16] + 'dcm' in train_frames]
#val_masks = [f for f in all_masks if 'image_' + f[6:16] + 'dcm' in val_frames]
test_masks = [f for f in all_masks if 'image_' + f[6:16] + 'dcm' in test_frames]
size_of_subset =int(len(train_masks)/k)
for i in range (0,k):
subset = (train_frames[i*size_of_subset:(i+1)*size_of_subset],train_masks[i*size_of_subset:(i+1)*size_of_subset])
kfold.append(subset)
return kfold, (test_frames,test_masks)
def get_model_name(k):
return 'model_'+str(k)+'.hdf5'
def float_range(start, stop, step):
while start < stop:
yield float(start)
start += decimal.Decimal(step)
frames_path = 'C:/Datasets/elderlymen1/2d/images'
masks_path = 'C:/Datasets/elderlymen1/2d/FASCIA_FILLED'
kf = kfoldsplit(frames_path, masks_path, 10)
def crossvalidation(epoch,kf, loops):
VALIDATION_ACCURACY = []
VALIDATION_LOSS = []
Params=[]
save_dir = 'C:/saved_models/'
fold_var = 1
i=0
for i in float_range(0,1,0.1):
for j in float_range(1e-6,1e-3,1e-6):
#while i <= loops:
#_alpha = random.uniform(0, 1)
#lrate = random.uniform(1e-3, 1e-6)
_alpha = i
lrate = j
Params.append([_alpha,lrate])
for subset in kf[0]:
list_IDs = subset[0]
train_data_generator = DataGenerator2(list_IDs, frames_path, masks_path, to_fit=True, batch_size=2,
dim=(512, 512), dimy=(512, 512), n_channels=1, n_classes=2, shuffle=True,
data_gen_args=data_gen_args_dict)
list_IDs = kf[1][0]
valid_data_generator = DataGenerator(list_IDs, frames_path, masks_path, to_fit=True, batch_size=2,
dim=(512, 512), dimy=(512, 512), n_channels=1, n_classes=2, shuffle=True)
# CREATE NEW MODEL
model = unet(pretrained_weights='csa/unet_ThighOuterSurface.hdf5')
# COMPILE NEW MODEL
model.compile(optimizer=Adam(lr=lrate), loss=combo_loss(alpha=_alpha, beta=0.4), metrics=[dice_accuracy])
# CREATE CALLBACKS
checkpoint = tf.keras.callbacks.ModelCheckpoint(save_dir + get_model_name(fold_var),
monitor='val_loss', verbose=1,
save_best_only=True, mode='max')
callbacks_list = [checkpoint]
# There can be other callbacks, but just showing one because it involves the model name
# This saves the best model
# FIT THE MODEL
history = model.fit(train_data_generator, validation_steps=len(valid_data_generator), steps_per_epoch=len(train_data_generator),
epochs=epoch,
callbacks=callbacks_list,
validation_data=valid_data_generator)
# PLOT HISTORY
# :
# :
# LOAD BEST MODEL to evaluate the performance of the model
model.load_weights("C:/saved_models/model_" + str(fold_var) + ".hdf5")
results = model.evaluate(valid_data_generator)
results = dict(zip(model.metrics_names, results))
VALIDATION_ACCURACY.append(results['dice_accuracy'])
VALIDATION_LOSS.append(results['loss'])
tf.keras.backend.clear_session()
fold_var += 1
del model
#i+=1
print(VALIDATION_ACCURACY)
print(Params)
sample = open('metrics.txt', '+r')
print(VALIDATION_ACCURACY, file=sample)
print(Params, file=sample)
print('...',file=sample)
sample.close()
crossvalidation(15,kf, 2)
Why is the memory still exhausted and how can I release it. Or if it is not possible, is there another option for a grid search and cross validation for an image segmentation model?
Thank you
After trying everything I found in order to release memory, then only thing that solved the problem was adding
del model
gc.collect()
at the end of the for loop

Triplet-Loss using pre-trained network

I am trying to use the Triple-Loss technique to fine-tune an EfficientNet network for human Re-ID using Keras. Here is the code I am using:
This is the generator:
class SampleGen(object):
def __init__(self, file_class_mapping):
self.file_class_mapping = file_class_mapping
self.class_to_list_files = defaultdict(list)
self.list_all_files = list(file_class_mapping.keys())
self.range_all_files = list(range(len(self.list_all_files)))
for file, class_ in file_class_mapping.items():
self.class_to_list_files[class_].append(file)
self.list_classes = list(set(self.file_class_mapping.values()))
self.range_list_classes = range(len(self.list_classes))
self.class_weight = np.array([len(self.class_to_list_files[class_]) for class_ in self.list_classes])
self.class_weight = self.class_weight / np.sum(self.class_weight)
def get_sample(self):
class_idx = np.random.choice(self.range_list_classes, 1, p=self.class_weight)[0]
examples_class_idx = np.random.choice(range(len(self.class_to_list_files[self.list_classes[class_idx]])), 2)
positive_example_1, positive_example_2 = \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[0]], \
self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[1]]
negative_example = None
while negative_example is None or self.file_class_mapping[negative_example] == \
self.file_class_mapping[positive_example_1]:
negative_example_idx = np.random.choice(self.range_all_files, 1)[0]
negative_example = self.list_all_files[negative_example_idx]
return positive_example_1, negative_example, positive_example_2
def read_and_resize(filepath):
im = Image.open((filepath)).convert('RGB')
im = im.resize((image_size, image_size))
return np.array(im, dtype="float32")
def augment(im_array):
if np.random.uniform(0, 1) > 0.9:
im_array = np.fliplr(im_array)
return im_array
def gen(triplet_gen):
while True:
list_positive_examples_1 = []
list_negative_examples = []
list_positive_examples_2 = []
for i in range(batch_size):
positive_example_1, negative_example, positive_example_2 = triplet_gen.get_sample()
path_pos1 = join(path_train, positive_example_1)
path_neg = join(path_train, negative_example)
path_pos2 = join(path_train, positive_example_2)
positive_example_1_img = read_and_resize(path_pos1)
negative_example_img = read_and_resize(path_neg)
positive_example_2_img = read_and_resize(path_pos2)
positive_example_1_img = augment(positive_example_1_img)
negative_example_img = augment(negative_example_img)
positive_example_2_img = augment(positive_example_2_img)
list_positive_examples_1.append(positive_example_1_img)
list_negative_examples.append(negative_example_img)
list_positive_examples_2.append(positive_example_2_img)
A = preprocess_input(np.array(list_positive_examples_1))
B = preprocess_input(np.array(list_positive_examples_2))
C = preprocess_input(np.array(list_negative_examples))
label = None
yield {'anchor_input': A, 'positive_input': B, 'negative_input': C}, label
This is how I create the model:
def get_model():
base_model = efn.EfficientNetB3(weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = Dropout(0.6)(x)
x = Dense(embedding_dim)(x)
x = Lambda(lambda x: K.l2_normalize(x, axis=1), name="enc_out")(x)
embedding_model = Model(base_model.input, x, name="embedding")
input_shape = (image_size, image_size, 3)
anchor_input = Input(input_shape, name='anchor_input')
positive_input = Input(input_shape, name='positive_input')
negative_input = Input(input_shape, name='negative_input')
anchor_embedding = embedding_model(anchor_input)
positive_embedding = embedding_model(positive_input)
negative_embedding = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [anchor_embedding, positive_embedding, negative_embedding]
triplet_model = Model(inputs, outputs)
triplet_model.add_loss(K.mean(triplet_loss(outputs)))
return embedding_model, triplet_model
And this is how I'm trying to run the training:
if __name__ == '__main__':
data = pd.read_csv(path_csv)
train, test = train_test_split(data, train_size=0.7, random_state=1337)
file_id_mapping_train = {k: v for k, v in zip(train.Image.values, train.Id.values)}
file_id_mapping_test = {k: v for k, v in zip(test.Image.values, test.Id.values)}
gen_tr = gen(SampleGen(file_id_mapping_train))
gen_te = gen(SampleGen(file_id_mapping_test))
embedding_model, triplet_model = get_model()
for i, layer in enumerate(embedding_model.layers):
print(i, layer.name, layer.trainable)
for layer in embedding_model.layers[379:]:
layer.trainable = True
for layer in embedding_model.layers[:379]:
layer.trainable = False
triplet_model.compile(loss=None, optimizer=Adam(0.0001))
history = triplet_model.fit(x=gen_tr,
validation_data=gen_te,
epochs=10,
verbose=1,
steps_per_epoch=200,
validation_steps=20,
callbacks=create_callbacks())
The csv contains two columns (Image, Id) and I am generating triplets on the go using a generator. The layer 379 is the last layer of the network so I just leave that as trainable. I let it run for some epochs and it seems like it doesn't converge, it stays around 2.30. On epochs like 20, the loss is even higher than what I've started with. Here you can see what I mean: train example Is there anything wrong with the way I think about the problem?
Thank you!

Tensorflow : train on mini batch, fast then slow

I am a beginner in tensorflow and I am trying to train a model using "mini batch". To do that I created a generator and iterate it. The problem I encounter is that, at the beginning of the epoch, the train seems fast (many batch per seconds) then the train slow down (1 batch per second) so I am wondering where I am wrong in my code but I do not find the problem.
def prepare_data(filename):
'''load file which give path and label for the data'''
f = open(filename, 'r')
data = [line.split() for line in f]
feat =[]
label=[]
for l in data:
feat.append(l[0])
label.append(l[1])
n_samples = len(feat)
shuf = list(range(n_samples))
random.shuffle(shuf)
count = Counter(label)
print(count)
feature = [feat[i] for i in shuf]
label = np.array(label, dtype=np.int)
return feature, label[shuf]
def get_specgrams(paths, nsamples=16000):
'''
Given list of paths, return specgrams.
'''
# read the wav files
wavs = [wavfile.read(x)[1] for x in paths]
# zero pad the shorter samples and cut off the long ones.
data = []
for wav in wavs:
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
data.append(d)
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return np.asarray(data)
def get_specgram(path, nsamples=16000):
'''
Given path, return specgrams.
'''
# read the wav files
wav = wavfile.read(path)[1]
# zero pad the shorter samples and cut off the long ones.
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return d
# multci classification binary labels
def one_hot_encode(labels, n_unique_labels=31):
n_labels = len(labels)
#print('number of unique labels:', n_unique_labels)
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return np.array(one_hot_encode, dtype=np.int)
#create_path_file('train/audio/')
def model(tr_features, tr_labels, ts_features, ts_labels):
# remove gpu device error
config = tf.ConfigProto(allow_soft_placement = True)
# parameters
BATCH_SIZE = 4
number_loop = math.ceil(len(tr_features)/BATCH_SIZE)
training_epochs = 10
n_dim = 16000
n_classes = 31 #len(np.unique(ts_labels))
n_hidden_units_one = 280
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.1
# get test data
ts_features, ts_labels = get_data(ts_features, ts_labels)
# Model
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
init = tf.initialize_all_variables()
# function and optimizers
cost_function = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train loop
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session(config = config) as sess:
sess.run(init)
for epoch in range(training_epochs):
print(' ## Epoch n°', epoch+1 )
batch = batch_generator(BATCH_SIZE, tr_features, tr_labels)
acc_total = 0.0
for cpt, (train_features_batch, train_labels_batch) in enumerate(batch):
_,cost = sess.run([optimizer,cost_function],feed_dict={X:train_features_batch,Y:train_labels_batch})
cost_history = np.append(cost_history,cost)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = accuracy.eval(feed_dict={X:train_features_batch,Y:train_labels_batch})
acc_total = (acc_total*cpt + acc)/(cpt+1)
print('Train accuracy : ', acc_total, '[',str(cpt+1), '/',str(number_loop), ']' ,flush=True, end='\r')
clear_output()
print('Train accuracy : ', acc_total)
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
y_true = sess.run(tf.argmax(ts_labels,1))
print('Test accuracy: ', round(sess.run(accuracy, feed_dict={X: ts_features, Y: ts_labels}) , 3))
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
def batch_generator(batch_size, feat_path, labels):
n_sample = len(feat_path)
ite = math.ceil(n_sample/batch_size)
for i in range(0, ite):
if i == ite-1:
label = one_hot_encode(labels[-batch_size:])
feat = get_specgrams(feat_path[-batch_size:])
yield (feat, label)
else:
label = one_hot_encode(labels[i*batch_size:i*batch_size+batch_size])
feat = get_specgrams(feat_path[i*batch_size:i*batch_size+batch_size])
yield (feat, label)
def get_data(feat_path, labels):
feat = get_specgrams(feat_path)
label = one_hot_encode(labels)
return feat, label
def __main__():
print('## Load data and shuffle')
feat_path, labels = prepare_data('data_labelised2.txt')
idx = int(len(labels)*0.8)
print("## Create Model")
model(feat_path[0:idx], labels[0:idx], feat_path[idx+1:], labels[idx+1:])
with tf.device('/gpu:0'):
__main__()

(De-)Convutional lstm autoencoder - error jumps

I'm trying to build a convolutional lstm autoencoder (that also predicts future and past) with Tensorflow, and it works to a certain degree, but the error sometimes jumps back up, so essentially, it never converges.
The model is as follows:
The encoder starts with a 64x64 frame from a 20 frame bouncing mnist video for each time step of the lstm. Every stacking layer of LSTM halfs it and increases the depth via 2x2 convolutions with a stride of 2. (so -->32x32x3 -->...--> 1x1x96)
On the other hand, the lstm performs 3x3 convolutions with a stride of 1 on its state. Both results are concatenated to form the new state. In the same way, the decoder uses transposed convolutions to go back to the original format. Then the squared error is calculated.
The error starts at around 2700 and it takes around 20 hours (geforce1060) to get down to ~1700. At which point the jumping back up (and it sometimes jumps back up to 2300 or even ridiculous values like 440300) happens often enough that I can't really get any lower. Also at that point, it can usually pinpoint where the number should be, but its too fuzzy to actually make out the digit...
I tried different learning rates and optimizers, so if anybody knows why that jumping happens, that'd make me happy :)
Here is a graph of the loss with epochs:
import tensorflow as tf
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
#based on code by loliverhennigh (Github)
class ConvCell(tf.contrib.rnn.RNNCell):
count = 0 #exists only to remove issues with variable scope
def __init__(self, shape, num_features, transpose = False):
self.shape = shape
self.num_features = num_features
self._state_is_tuple = True
self._transpose = transpose
ConvCell.count+=1
self.count = ConvCell.count
#property
def state_size(self):
return (tf.contrib.rnn.LSTMStateTuple(self.shape[0:4],self.shape[0:4]))
#property
def output_size(self):
return tf.TensorShape(self.shape[1:4])
#here comes to the actual conv lstm implementation, if transpose = true, it performs a deconvolution on the input
def __call__(self, inputs, state, scope=None):
with tf.variable_scope(scope or type(self).__name__+str(self.count)):
c, h = state
state_shape = h.shape
input_shape = inputs.shape
#filter variables and convolutions on data coming from the same cell, a time step previous
h_filters = tf.get_variable("h_filters",[3,3,state_shape[3],self.num_features])
h_filters_gates = tf.get_variable("h_filters_gates",[3,3,state_shape[3],3])
h_partial = tf.nn.conv2d(h,h_filters,[1,1,1,1],'SAME')
h_partial_gates = tf.nn.conv2d(h,h_filters_gates,[1,1,1,1],'SAME')
c_filters = tf.get_variable("c_filters",[3,3,state_shape[3],3])
c_partial = tf.nn.conv2d(c,c_filters,[1,1,1,1],'SAME')
#filters and convolutions/deconvolutions on data coming fromthe cell input
if self._transpose:
x_filters = tf.get_variable("x_filters",[2,2,self.num_features,input_shape[3]])
x_filters_gates = tf.get_variable("x_filters_gates",[2,2,3,input_shape[3]])
x_partial = tf.nn.conv2d_transpose(inputs,x_filters,[int(state_shape[0]),int(state_shape[1]),int(state_shape[2]),self.num_features],[1,2,2,1],'VALID')
x_partial_gates = tf.nn.conv2d_transpose(inputs,x_filters_gates,[int(state_shape[0]),int(state_shape[1]),int(state_shape[2]),3],[1,2,2,1],'VALID')
else:
x_filters = tf.get_variable("x_filters",[2,2,input_shape[3],self.num_features])
x_filters_gates = tf.get_variable("x_filters_gates",[2,2,input_shape[3],3])
x_partial = tf.nn.conv2d(inputs,x_filters,[1,2,2,1],'VALID')
x_partial_gates = tf.nn.conv2d(inputs,x_filters_gates,[1,2,2,1],'VALID')
#some more lstm gate business
gate_bias = tf.get_variable("gate_bias",[1,1,1,3])
h_bias = tf.get_variable("h_bias",[1,1,1,self.num_features*2])
gates = h_partial_gates + x_partial_gates + c_partial + gate_bias
i,f,o = tf.split(gates,3,axis=3)
#concatenate the units coming from the spacial and the temporal dimension to build a unified state
concat = tf.concat([h_partial,x_partial],3) + h_bias
new_c = tf.nn.relu(concat)*tf.sigmoid(i)+c*tf.sigmoid(f)
new_h = new_c * tf.sigmoid(o)
new_state = tf.contrib.rnn.LSTMStateTuple(new_c,new_h)
return new_h, new_state #its redundant, but thats how tensorflow likes it, apparently
#global variables
LEARNING_RATE = 0.005
ITERATIONS_PER_EPOCH = 80
BATCH_SIZE = 75
TEST = False #manual switch to go from training to testing
if TEST:
BATCH_SIZE = 1
inputs = tf.placeholder(tf.float32, (20, BATCH_SIZE, 64, 64,1))
shape0 = [BATCH_SIZE,64,64,2]
shape1 = [BATCH_SIZE,32,32,6]
shape2 = [BATCH_SIZE,16,16,12]
shape3 = [BATCH_SIZE,8,8,24]
shape4 = [BATCH_SIZE,4,4,48]
shape5 = [BATCH_SIZE,2,2,96]
shape6 = [BATCH_SIZE,1,1,192]
#apparently tf.multirnncell has very specific requirements for the initial states oO
initial_state1 = (tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape1),tf.zeros(shape1)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape2),tf.zeros(shape2)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape3),tf.zeros(shape3)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape4),tf.zeros(shape4)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape5),tf.zeros(shape5)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape6),tf.zeros(shape6)))
initial_state2 = (tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape5),tf.zeros(shape5)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape4),tf.zeros(shape4)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape3),tf.zeros(shape3)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape2),tf.zeros(shape2)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape1),tf.zeros(shape1)),tf.contrib.rnn.LSTMStateTuple(tf.zeros(shape0),tf.zeros(shape0)))
#encoding part of the autoencoder graph
cell1 = ConvCell(shape1,3)
cell2 = ConvCell(shape2,6)
cell3 = ConvCell(shape3,12)
cell4 = ConvCell(shape4,24)
cell5 = ConvCell(shape5,48)
cell6 = ConvCell(shape6,96)
mcell = tf.contrib.rnn.MultiRNNCell([cell1,cell2,cell3,cell4,cell5,cell6])
rnn_outputs, rnn_states = tf.nn.dynamic_rnn(mcell, inputs[0:20,:,:,:],initial_state=initial_state1,dtype=tf.float32, time_major=True)
#decoding part of the autoencoder graph, forward block and backwards block
cell9a = ConvCell(shape5,48,transpose = True)
cell10a = ConvCell(shape4,24,transpose = True)
cell11a = ConvCell(shape3,12,transpose = True)
cell12a = ConvCell(shape2,6,transpose = True)
cell13a = ConvCell(shape1,3,transpose = True)
cell14a = ConvCell(shape0,1,transpose = True)
mcella = tf.contrib.rnn.MultiRNNCell([cell9a,cell10a,cell11a,cell12a,cell13a,cell14a])
cell9b = ConvCell(shape5,48,transpose = True)
cell10b = ConvCell(shape4,24,transpose = True)
cell11b= ConvCell(shape3,12,transpose = True)
cell12b = ConvCell(shape2,6,transpose = True)
cell13b = ConvCell(shape1,3,transpose = True)
cell14b = ConvCell(shape0,1,transpose = True)
mcellb = tf.contrib.rnn.MultiRNNCell([cell9b,cell10b,cell11b,cell12b,cell13b,cell14b])
def PredictionLayer(rnn_outputs,viewPoint = 11, reverse = False):
predLength = viewPoint-2 if reverse else 20-viewPoint #vision is the input for the decoder
vision = tf.concat([rnn_outputs[viewPoint-1:viewPoint,:,:,:],tf.zeros([predLength,BATCH_SIZE,1,1,192])],0)
if reverse:
rnn_outputs2, rnn_states = tf.nn.dynamic_rnn(mcellb, vision, initial_state = initial_state2, time_major=True)
else:
rnn_outputs2, rnn_states = tf.nn.dynamic_rnn(mcella, vision, initial_state = initial_state2, time_major=True)
mean = tf.reduce_mean(rnn_outputs2,4)
if TEST:
return mean
if reverse:
return tf.reduce_sum(tf.square(mean-inputs[viewPoint-2::-1,:,:,:,0]))
else:
return tf.reduce_sum(tf.square(mean-inputs[viewPoint-1:20,:,:,:,0]))
if TEST:
mean = tf.concat([PredictionLayer(rnn_outputs,11,True)[::-1,:,:,:],createPredictionLayer(rnn_outputs,11)],0)
else: #training part of the graph
error = tf.zeros([1])
for i in range(8,15): #range size of 7 or less works, 9 or more does not, no idea why
error += PredictionLayer(rnn_outputs, i)
error += PredictionLayer(rnn_outputs, i, True)
train_fn = tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE).minimize(error)
################################################################################
## TRAINING LOOP ##
################################################################################
#code based on siemanko/tf_lstm.py (Github)
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
saver = tf.train.Saver(restore_sequentially=True, allow_empty=True,)
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
session.run(tf.global_variables_initializer())
vids = np.load("mnist_test_seq.npy") #20/10000/64/64 , moving mnist dataset from http://www.cs.toronto.edu/~nitish/unsupervised_video/
vids = vids[:,0:6000,:,:] #training set
saver.restore(session,tf.train.latest_checkpoint('./conv_lstm_multiples_v2/'))
#saver.restore(session,'.\conv_lstm_multiples\iteration-74')
for epoch in range(1000):
if TEST:
break
epoch_error = 0
#randomize batches each epoch
vids = np.swapaxes(vids,0,1)
np.random.shuffle(vids)
vids = np.swapaxes(vids,0,1)
for i in range(ITERATIONS_PER_EPOCH):
#running the graph and feeding data
err,_ = session.run([error, train_fn], {inputs: np.expand_dims(vids[:,i*BATCH_SIZE:(i+1)*BATCH_SIZE,:,:],axis=4)})
print(err)
epoch_error += err
#training error each epoch and regular saving
epoch_error /= (ITERATIONS_PER_EPOCH*BATCH_SIZE*4096*20*7)
if (epoch+1) % 5 == 0:
saver.save(session,'.\conv_lstm_multiples_v2\iteration',global_step=epoch)
print("saved")
print("Epoch %d, train error: %f" % (epoch, epoch_error))
#testing
plt.ion()
f, axarr = plt.subplots(2)
vids = np.load("mnist_test_seq.npy")
for i in range(6000,10000):
img = session.run([mean], {inputs: np.expand_dims(vids[:,i:i+1,:,:],axis=4)})
for j in range(20):
axarr[0].imshow(img[0][j,0,:,:])
axarr[1].imshow(vids[j,i,:,:])
plt.show()
plt.pause(0.1)
Usually this happens when gradients' magnitude is very high at some point and causes your network parameters to change a lot. To verify that it is indeed the case, you can produce the same plot of gradient magnitudes and see if they jump right before the loss jump. Assuming this is the case, the classic approach is to use gradient clipping (or go all the way to natural gradient).