Transfer learning by using vgg in pytorch - numpy

I am using vgg16 for image classification. I want to test my transfered model with the following code:
classes = ['A', 'B', 'C']
len(classes) #3
len(test_data)#171
batch_size=10
# Testing
test_loss = 0.0
class_correct = list(0. for i in range(len(classes)))
class_total = list(0. for i in range(len(classes)))
vgg16.eval()
for data, target in test_loader:
output = vgg16(data)
loss = criterion(output, target)
test_loss += loss.item()*data.size(0)
_, pred = torch.max(output, 1)
correct_tensor = pred.eq(target.data.view_as(pred))
correct = np.squeeze(correct_tensor.numpy())
for i in range(batch_size):
label = target.data[i]
class_correct[label] += correct[i].item()
class_total[label] += 1
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(len(classes)):
if class_total[i] > 0:
print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
classes[i], 100 * class_correct[i] / class_total[i],
np.sum(class_correct[i]), np.sum(class_total[i])))
else:
print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
100. * np.sum(class_correct) / np.sum(class_total),
np.sum(class_correct), np.sum(class_total)))
I receive following error:
I receive following error:
15 for i in range(batch_size):
16 label = target.data[i]
---> 17 class_correct[label] += correct[i].item()
18 class_total[label] += 1
19
IndexError: too many indices for array
I do not know why I am getting this error and how I can solve it. I would be grateful if you could help me.

Related

how to implement moving max (and min) calculation in the customized tf2.keras layer

During the training procedure, I want to calculate the moving maximum(and minimum) values of a batch feature maps and then I will implement quantization alogrithm based on the moving max (or min) values. For example: moving_max = (1-momentum)x(previous moving_max) + momentum x (current max value of a batch).
I implement the following codes based on the customized tf2.keras.layer:
from tensorflow.keras.layers import Layer
class QATQuantizerLayer(Layer):
def __init__(self, num_bits, momentum=0.01, **kwargs):
super(QATQuantizerLayer, self).__init__(**kwargs)
self.num_bits = num_bits
self.momentum = momentum
self.num_flag = 0
self.quant_min_val = 0
self.quant_max_val = (1 << self.num_bits) - 1
self.quant_range = float(self.quant_max_val - self.quant_min_val)
def build(self, input_shape):
self.moving_min = self.add_weight("moving_min", shape=(1,), initializer=tf.constant_initializer(-6), trainable=False)
self.moving_max = self.add_weight("moving_max", shape=(1,), initializer=tf.constant_initializer(6), trainable=False)
return super(QATQuantizerLayer, self).build(input_shape)
def call(self, inputs, training, **kwargs):
if training is None:
training = False
if training == True:
batch_min = tf.reduce_min(inputs)
batch_max = tf.reduce_max(inputs)
if self.num_flag == 0:
self.num_flag += 1
self.moving_min = batch_min
self.moving_max = batch_max
else:
temp_min = (1 - self.momentum) * self.moving_min + self.momentum * batch_min
temp_max = (1 - self.momentum) * self.moving_max + self.momentum * batch_max
self.moving_min = temp_min
self.moving_max = temp_max
float_range = self.moving_max - self.moving_min
scale = float_range / self.quant_range
scale = tf.maximum(scale, tf.keras.backend.epsilon())
zero_point = tf.math.round(self.moving_min / scale)
output = (tf.clip_by_value(_round_imp(inputs / scale) - zero_point,
self.quant_min_val, self.quant_max_val) + zero_point) * scale
return output
However, when I start to train I get the following errors:
TypeError: An op outside of the function building code is being passed a "Graph" tensor. It is possible to have Graph tensors leak out of the function building context by including a tf.init_scope in your function building code. For example, the following function will fail:......
If I change the following statement: [temp_min = (1 - self.momentum) * self.moving_min + self.momentum * batch_min] to [temp_min = (1 - self.momentum) + self.momentum * batch_min], the error is disappeared. (That is, remove self.moving_min from the statement)
How can I solve this problem?
Thank you very much.

I want train a set of weight using pytorch, but the weights do not even change

I want to reproduce a method from a paper, the code in this paper was written in tensorflow1.0 and I want to rewrite it in pytorch. A brief description, I want to get a set of G that can be used to reweight input data but in training, the G doesn't even change, this is the tensorflow code:
n,p = X_input.shape
n_e, p_e = X_encoder_input.shape
display_step = 100
X = tf.placeholder("float", [None, p])
X_encoder = tf.placeholder("float", [None, p_e])
G = tf.Variable(tf.ones([n,1]))
loss_balancing = tf.constant(0, tf.float32)
for j in range(1,p+1):
X_j = tf.slice(X_encoder, [j*n,0],[n,p_e])
I = tf.slice(X, [0,j-1],[n,1])
balancing_j = tf.divide(tf.matmul(tf.transpose(X_j),G*G*I),tf.maximum(tf.reduce_sum(G*G*I),tf.constant(0.1))) - tf.divide(tf.matmul(tf.transpose(X_j),G*G*(1-I)),tf.maximum(tf.reduce_sum(G*G*(1-I)),tf.constant(0.1)))
loss_balancing += tf.norm(balancing_j,ord=2)
loss_regulizer = (tf.reduce_sum(G*G)-n)**2 + 10*(tf.reduce_sum(G*G-1))**2#
loss = loss_balancing + 0.0001*loss_regulizer
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
and this is my rewriting pytorch code:
n, p = x_test.shape
loss_balancing = torch.tensor(0.0)
G = nn.Parameter(torch.ones([n,1]))
optimizer = torch.optim.RMSprop([G] , lr=0.001)
for i in range(num_steps):
for j in range(1, p+1):
x_j = x_all_encoder[j * n : j*n + n , :]
I = x_test[0:n , j-1:j]
balancing_j = torch.divide(torch.matmul(torch.transpose(x_j,0,1) , G * G * I) ,
torch.maximum( (G * G * I).sum() ,
torch.tensor(0.1) -
torch.divide(torch.matmul(torch.transpose(x_j,0,1) ,G * G * (1-I)),
torch.maximum( (G*G*(1-I)).sum() , torch.tensor(0.1) )
)
)
)
loss_balancing += nn.Parameter(torch.norm(balancing_j))
loss_regulizer = nn.Parameter(((G * G) - n).sum() ** 2 + 10 * ((G * G - 1).sum()) ** 2)
loss = nn.Parameter( loss_balancing + 0.0001 * loss_regulizer )
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % 100 == 0:
print('Loss:{:.4f}'.format(loss.item()))
and the G.grad = None, I want to know how to get the G a set of value by iteration to minimize the Loss , Thanks.
Firstly, please provide a minimal reproducible example. It will be very helpful for people to answer your question.
Since G.grad has no value, it indicates that loss.backward() didn't properly work.
The computation of gradient can be disturbed by many factors, but in this case, I suspect the maximum operation in your code prevents the backward flow since the maximum operation is not differentiable in general.
To check if this hypothesis is correct, you could check the gradient of a tensor created after the maximum operation which I can't do because provided code is not executable in my case.

Resource exhausted: OOM model.fit in foor loop grid search cross validation

I am trying to do a grid search by calling model.fit recursively for different parameters of my model.
I get a resource exhausted error in tensorflow. In spite of doing del model and tf.keras.backend.clear_session() at the end of the loop. This is my code
def kfoldsplit(FRAME_PATH, MASK_PATH,k):
kfold = []
all_frames = os.listdir(FRAME_PATH)
all_masks = os.listdir(MASK_PATH)
all_frames.sort(key=lambda var: [int(x) if x.isdigit() else x
for x in re.findall(r'[^0-9]|[0-9]+', var)])
all_masks.sort(key=lambda var: [int(x) if x.isdigit() else x
for x in re.findall(r'[^0-9]|[0-9]+', var)])
random.seed(230)
random.shuffle(all_frames)
# Generate train, val, and test sets for frames
train_split = int(0.8 * len(all_frames))
#val_split = int(0.9 * len(all_frames))
#test_split = int(0.9 * len(all_frames))
train_frames = all_frames[:train_split]
#val_frames = all_frames[train_split:val_split]
test_frames = all_frames[train_split:]
# Generate corresponding mask lists for masks
train_masks = [f for f in all_masks if 'image_' + f[6:16] + 'dcm' in train_frames]
#val_masks = [f for f in all_masks if 'image_' + f[6:16] + 'dcm' in val_frames]
test_masks = [f for f in all_masks if 'image_' + f[6:16] + 'dcm' in test_frames]
size_of_subset =int(len(train_masks)/k)
for i in range (0,k):
subset = (train_frames[i*size_of_subset:(i+1)*size_of_subset],train_masks[i*size_of_subset:(i+1)*size_of_subset])
kfold.append(subset)
return kfold, (test_frames,test_masks)
def get_model_name(k):
return 'model_'+str(k)+'.hdf5'
def float_range(start, stop, step):
while start < stop:
yield float(start)
start += decimal.Decimal(step)
frames_path = 'C:/Datasets/elderlymen1/2d/images'
masks_path = 'C:/Datasets/elderlymen1/2d/FASCIA_FILLED'
kf = kfoldsplit(frames_path, masks_path, 10)
def crossvalidation(epoch,kf, loops):
VALIDATION_ACCURACY = []
VALIDATION_LOSS = []
Params=[]
save_dir = 'C:/saved_models/'
fold_var = 1
i=0
for i in float_range(0,1,0.1):
for j in float_range(1e-6,1e-3,1e-6):
#while i <= loops:
#_alpha = random.uniform(0, 1)
#lrate = random.uniform(1e-3, 1e-6)
_alpha = i
lrate = j
Params.append([_alpha,lrate])
for subset in kf[0]:
list_IDs = subset[0]
train_data_generator = DataGenerator2(list_IDs, frames_path, masks_path, to_fit=True, batch_size=2,
dim=(512, 512), dimy=(512, 512), n_channels=1, n_classes=2, shuffle=True,
data_gen_args=data_gen_args_dict)
list_IDs = kf[1][0]
valid_data_generator = DataGenerator(list_IDs, frames_path, masks_path, to_fit=True, batch_size=2,
dim=(512, 512), dimy=(512, 512), n_channels=1, n_classes=2, shuffle=True)
# CREATE NEW MODEL
model = unet(pretrained_weights='csa/unet_ThighOuterSurface.hdf5')
# COMPILE NEW MODEL
model.compile(optimizer=Adam(lr=lrate), loss=combo_loss(alpha=_alpha, beta=0.4), metrics=[dice_accuracy])
# CREATE CALLBACKS
checkpoint = tf.keras.callbacks.ModelCheckpoint(save_dir + get_model_name(fold_var),
monitor='val_loss', verbose=1,
save_best_only=True, mode='max')
callbacks_list = [checkpoint]
# There can be other callbacks, but just showing one because it involves the model name
# This saves the best model
# FIT THE MODEL
history = model.fit(train_data_generator, validation_steps=len(valid_data_generator), steps_per_epoch=len(train_data_generator),
epochs=epoch,
callbacks=callbacks_list,
validation_data=valid_data_generator)
# PLOT HISTORY
# :
# :
# LOAD BEST MODEL to evaluate the performance of the model
model.load_weights("C:/saved_models/model_" + str(fold_var) + ".hdf5")
results = model.evaluate(valid_data_generator)
results = dict(zip(model.metrics_names, results))
VALIDATION_ACCURACY.append(results['dice_accuracy'])
VALIDATION_LOSS.append(results['loss'])
tf.keras.backend.clear_session()
fold_var += 1
del model
#i+=1
print(VALIDATION_ACCURACY)
print(Params)
sample = open('metrics.txt', '+r')
print(VALIDATION_ACCURACY, file=sample)
print(Params, file=sample)
print('...',file=sample)
sample.close()
crossvalidation(15,kf, 2)
Why is the memory still exhausted and how can I release it. Or if it is not possible, is there another option for a grid search and cross validation for an image segmentation model?
Thank you
After trying everything I found in order to release memory, then only thing that solved the problem was adding
del model
gc.collect()
at the end of the for loop

Stratify batch in Tensorflow 2

I have minibatches that I get from an sqlite database with data of integer and float type, x, and a binary label in 0 and 1, y. I am looking for something like X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(y, x, test_size=0.1, random_state=1, stratify=True) from scikit-learn, where a keyword could stratify the data (i.e. the same number of class-0 and class-1 instances).
In Tensorflow 2, stratification seems not straightforwardly possible. My very complicated solution works for me, but takes a lot of time because of all the reshaping and transposing:
def stratify(x, y):
# number of positive instances (the smaller class)
pos = np.sum(y).item() # how many positive bonds there are
x = np.transpose(x)
# number of features
f = np.shape(x)[1]
# filter only class 1
y = tf.transpose(y)
x_pos = tf.boolean_mask(x,
y_pos = tf.boolean_mask(y, y)
# filter only class 1
x_neg = tf.boolean_mask(x, tf.bitwise.invert(y)-254)
x_neg = tf.reshape(x_neg, [f,-1])
y_neg = tf.boolean_mask(y, tf.bitwise.invert(y)-254)
# just take randomy as many class-0 as there are class-1
x_neg = tf.transpose(tf.random.shuffle(tf.transpose(x_neg)))
x_neg = x_neg[:,0:pos]
y_neg = y_neg[0:pos]
# concat the class-1 and class-0 together, then shuffle, and concat back together
x = tf.concat([x_pos,tf.transpose(x_neg)],0)
y = tf.concat([y_pos, tf.transpose(y_neg)],0)
xy = tf.concat([tf.transpose(x), tf.cast(np.reshape(y,[1, -1]), tf.float64)],0)
xy = tf.transpose((tf.random.shuffle(tf.transpose(xy)))) # because there is no axis arg in shuffle
x = xy[0:f,:]
x = tf.transpose(x)
y = xy[f,:]
return x, y
I am happy to see some feedback/improvement on my own function or novel, easier ideas.
Data division is best if it is done in raw format only or before you transform it into tensors. If there is a strong requirement to do it in TensorFlow only, then I will suggest you to make use of tf.data.Dataset class. I have added the demo code with relevant comments explaining the steps.
import tensorflow as tf
import numpy as np
TEST_SIZE = 0.1
DATA_SIZE = 1000
# Create data
X_data = np.random.rand(DATA_SIZE, 28, 28, 1)
y_data = np.random.randint(0, 2, [DATA_SIZE])
samples1 = np.sum(y_data)
print('Percentage of 1 = ', samples1 / len(y_data))
# Create TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices((X_data, y_data))
# Gather data with 0 and 1 labels separately
class0_dataset = dataset.filter(lambda x, y: y == 0)
class1_dataset = dataset.filter(lambda x, y: y == 1)
# Shuffle them
class0_dataset = class0_dataset.shuffle(DATA_SIZE)
class1_dataset = class1_dataset.shuffle(DATA_SIZE)
# Split them
class0_test_samples_len = int((DATA_SIZE - samples1) * TEST_SIZE)
class0_test = class0_dataset.take(class0_test_samples_len)
class0_train = class0_dataset.skip(class0_test_samples_len)
class1_test_samples_len = int(samples1 * TEST_SIZE)
class1_test = class1_dataset.take(class1_test_samples_len)
class1_train = class1_dataset.skip(class1_test_samples_len)
print('Train Class 0 = ', len(list(class0_train)), ' Class 1 = ', len(list(class1_train)))
print('Test Class 0 = ', len(list(class0_test)), ' Class 1 = ', len(list(class1_test)))
# Gather datasets
train_dataset = class0_train.concatenate(class1_train).shuffle(DATA_SIZE)
test_dataset = class0_test.concatenate(class1_test).shuffle(DATA_SIZE)
print('Train dataset size = ', len(list(train_dataset)))
print('Test dataset size = ', len(list(test_dataset)))
Sample output:
Percentage of 1 = 0.474
Train Class 0 = 474 Class 1 = 427
Test Class 0 = 52 Class 1 = 47
Train dataset size = 901
Test dataset size = 99

Tensorflow : train on mini batch, fast then slow

I am a beginner in tensorflow and I am trying to train a model using "mini batch". To do that I created a generator and iterate it. The problem I encounter is that, at the beginning of the epoch, the train seems fast (many batch per seconds) then the train slow down (1 batch per second) so I am wondering where I am wrong in my code but I do not find the problem.
def prepare_data(filename):
'''load file which give path and label for the data'''
f = open(filename, 'r')
data = [line.split() for line in f]
feat =[]
label=[]
for l in data:
feat.append(l[0])
label.append(l[1])
n_samples = len(feat)
shuf = list(range(n_samples))
random.shuffle(shuf)
count = Counter(label)
print(count)
feature = [feat[i] for i in shuf]
label = np.array(label, dtype=np.int)
return feature, label[shuf]
def get_specgrams(paths, nsamples=16000):
'''
Given list of paths, return specgrams.
'''
# read the wav files
wavs = [wavfile.read(x)[1] for x in paths]
# zero pad the shorter samples and cut off the long ones.
data = []
for wav in wavs:
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
data.append(d)
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return np.asarray(data)
def get_specgram(path, nsamples=16000):
'''
Given path, return specgrams.
'''
# read the wav files
wav = wavfile.read(path)[1]
# zero pad the shorter samples and cut off the long ones.
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return d
# multci classification binary labels
def one_hot_encode(labels, n_unique_labels=31):
n_labels = len(labels)
#print('number of unique labels:', n_unique_labels)
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return np.array(one_hot_encode, dtype=np.int)
#create_path_file('train/audio/')
def model(tr_features, tr_labels, ts_features, ts_labels):
# remove gpu device error
config = tf.ConfigProto(allow_soft_placement = True)
# parameters
BATCH_SIZE = 4
number_loop = math.ceil(len(tr_features)/BATCH_SIZE)
training_epochs = 10
n_dim = 16000
n_classes = 31 #len(np.unique(ts_labels))
n_hidden_units_one = 280
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.1
# get test data
ts_features, ts_labels = get_data(ts_features, ts_labels)
# Model
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
init = tf.initialize_all_variables()
# function and optimizers
cost_function = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train loop
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session(config = config) as sess:
sess.run(init)
for epoch in range(training_epochs):
print(' ## Epoch n°', epoch+1 )
batch = batch_generator(BATCH_SIZE, tr_features, tr_labels)
acc_total = 0.0
for cpt, (train_features_batch, train_labels_batch) in enumerate(batch):
_,cost = sess.run([optimizer,cost_function],feed_dict={X:train_features_batch,Y:train_labels_batch})
cost_history = np.append(cost_history,cost)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = accuracy.eval(feed_dict={X:train_features_batch,Y:train_labels_batch})
acc_total = (acc_total*cpt + acc)/(cpt+1)
print('Train accuracy : ', acc_total, '[',str(cpt+1), '/',str(number_loop), ']' ,flush=True, end='\r')
clear_output()
print('Train accuracy : ', acc_total)
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
y_true = sess.run(tf.argmax(ts_labels,1))
print('Test accuracy: ', round(sess.run(accuracy, feed_dict={X: ts_features, Y: ts_labels}) , 3))
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
def batch_generator(batch_size, feat_path, labels):
n_sample = len(feat_path)
ite = math.ceil(n_sample/batch_size)
for i in range(0, ite):
if i == ite-1:
label = one_hot_encode(labels[-batch_size:])
feat = get_specgrams(feat_path[-batch_size:])
yield (feat, label)
else:
label = one_hot_encode(labels[i*batch_size:i*batch_size+batch_size])
feat = get_specgrams(feat_path[i*batch_size:i*batch_size+batch_size])
yield (feat, label)
def get_data(feat_path, labels):
feat = get_specgrams(feat_path)
label = one_hot_encode(labels)
return feat, label
def __main__():
print('## Load data and shuffle')
feat_path, labels = prepare_data('data_labelised2.txt')
idx = int(len(labels)*0.8)
print("## Create Model")
model(feat_path[0:idx], labels[0:idx], feat_path[idx+1:], labels[idx+1:])
with tf.device('/gpu:0'):
__main__()