MNIST - Vanilla Neural Network - Why Cost Function is Increasing? - tensorflow

I've been combing through this code for a week now trying to figure out why my cost function is increasing as in the following image. Reducing the learning rate does help but very little. Can anyone spot why the cost function isn't working as expected?
I realise a CNN would be preferable, but I still want to understand why this simple network is failing.
Please help:)
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
mnist = input_data.read_data_sets("MNIST_DATA/",one_hot=True)
def createPlaceholders():
xph = tf.placeholder(tf.float32, (784, None))
yph = tf.placeholder(tf.float32, (10, None))
return xph, yph
def init_param(layers_dim):
weights = {}
L = len(layers_dim)
for l in range(1,L):
weights['W' + str(l)] = tf.get_variable('W' + str(l), shape=(layers_dim[l],layers_dim[l-1]), initializer= tf.contrib.layers.xavier_initializer())
weights['b' + str(l)] = tf.get_variable('b' + str(l), shape=(layers_dim[l],1), initializer= tf.zeros_initializer())
return weights
def forward_prop(X,L,weights):
parameters = {}
parameters['A0'] = tf.cast(X,tf.float32)
for l in range(1,L-1):
parameters['Z' + str(l)] = tf.add(tf.matmul(weights['W' + str(l)], parameters['A' + str(l-1)]), weights['b' + str(l)])
parameters['A' + str(l)] = tf.nn.relu(parameters['Z' + str(l)])
parameters['Z' + str(L-1)] = tf.add(tf.matmul(weights['W' + str(L-1)], parameters['A' + str(L-2)]), weights['b' + str(L-1)])
return parameters['Z' + str(L-1)]
def compute_cost(ZL,Y):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = tf.cast(Y,tf.float32), logits = ZL))
return cost
def randomMiniBatches(X,Y,minibatch_size):
m = X.shape[1]
shuffle = np.random.permutation(m)
temp_X = X[:,shuffle]
temp_Y = Y[:,shuffle]
num_complete_minibatches = int(np.floor(m/minibatch_size))
mini_batches = []
for batch in range(num_complete_minibatches):
mini_batches.append((temp_X[:,batch*minibatch_size: (batch+1)*minibatch_size], temp_Y[:,batch*minibatch_size: (batch+1)*minibatch_size]))
mini_batches.append((temp_X[:,num_complete_minibatches*minibatch_size:], temp_Y[:,num_complete_minibatches*minibatch_size:]))
return mini_batches
def model(X, Y, layers_dim, learning_rate = 0.001, num_epochs = 20, minibatch_size = 64):
tf.reset_default_graph()
costs = []
xph, yph = createPlaceholders()
weights = init_param(layers_dim)
ZL = forward_prop(xph, len(layers_dim), weights)
cost = compute_cost(ZL,yph)
optimiser = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
minibatches = randomMiniBatches(X,Y,minibatch_size)
epoch_cost = 0
for b, mini in enumerate(minibatches,1):
mini_x, mini_y = mini
_,c = sess.run([optimiser,cost],feed_dict={xph:mini_x,yph:mini_y})
epoch_cost += c
print('epoch: ',epoch+1,'/ ',num_epochs)
epoch_cost /= len(minibatches)
costs.append(epoch_cost)
plt.plot(costs)
print(costs)
X_train = mnist.train.images.T
n_x = X_train.shape[0]
Y_train = mnist.train.labels.T
n_y = Y_train.shape[0]
layers_dim = [n_x,10,n_y]
model(X_train, Y_train, layers_dim)

Without going to much into how you draw the mini batches: I think the problem is you are for some reason defining axis 1 of xph and yph as batch dimension (and feeding accordingly) while the computational graph of the network expects axis 0 to be the batch dimension like it is usually done.
So your forward propagation is actually performed along the batch dimension, which does not make sense.

Related

Converting Tensorflow code to Pytorch - performance metrics very different

I have converted a tensorflow code for timeseries analysis to pytorch and performance difference is very high, in fact pytorch layers cannot account for seasonality at all. It feels like I must be missing something important.
Please help find where the pytorch code is lacking that the learning is not up to the par. I noticed that loss values has high jumps when it encounters the season change and is not learning that. With the same layers, nodes and every other thing, I imagined the performance to be close.
# tensorflow code
window_size = 20
batch_size = 32
shuffle_buffer_size = 1000
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
dataset = tf.data.Dataset.from_tensor_slices(series)
dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1]))
dataset = dataset.batch(batch_size).prefetch(1)
return dataset
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(100, input_shape=[window_size], activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(1)
])
model.compile(loss="mse", optimizer=tf.keras.optimizers.SGD(lr=1e-6, momentum=0.9))
model.fit(dataset,epochs=100,verbose=0)
forecast = []
for time in range(len(series) - window_size):
forecast.append(model.predict(series[time:time + window_size][np.newaxis]))
forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, results)
tf.keras.metrics.mean_absolute_error(x_valid, results).numpy()
# pytorch code
window_size = 20
batch_size = 32
shuffle_buffer_size = 1000
class tsdataset(Dataset):
def __init__(self, series, window_size):
self.series = series
self.window_size = window_size
self.dataset, self.labels = self.preprocess()
def preprocess(self):
series = self.series
final, labels = [], []
for i in range(len(series)-self.window_size):
final.append(np.array(series[i:i+window_size]))
labels.append(np.array(series[i+window_size]))
return torch.from_numpy(np.array(final)), torch.from_numpy(np.array(labels))
def __getitem__(self,index):
# print(self.dataset[index], self.labels[index], index)
return self.dataset[index], self.labels[index]
def __len__(self):
return len(self.dataset)
train_dataset = tsdataset(x_train, window_size)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
class tspredictor(nn.Module):
def __init__(self, window_size, out1, out2, out3):
super(tspredictor, self).__init__()
self.l1 = nn.Linear(window_size, out1)
self.l2 = nn.Linear(out1, out2)
self.l3 = nn.Linear(out2, out3)
def forward(self,seq):
l1 = F.relu(self.l1(seq))
l2 = F.relu(self.l2(l1))
l3 = self.l3(l2)
return l3
model = tspredictor(20, 100,10,1)
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=1e-6, momentum=0.9)
for epoch in range(100):
for t,l in train_dataloader:
model.zero_grad()
tag_scores = model(t)
loss = loss_function(tag_scores, l)
loss.backward()
optimizer.step()
# print("Epoch is {}, loss is {}".format(epoch, loss.data))
forecast = []
for time in range(len(series) - window_size):
prediction = model(torch.from_numpy(series[time:time + window_size][np.newaxis]))
forecast.append(prediction)
forecast = forecast[split_time-window_size:]
results = np.array(forecast)
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, results)
To generate data, you can use:
def plot_series(time, series, format="-", start=0, end=None):
plt.plot(time[start:end], series[start:end], format)
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(False)
def trend(time, slope=0):
return slope * time
def seasonal_pattern(season_time):
"""Just an arbitrary pattern, you can change it if you wish"""
return np.where(season_time < 0.1,
np.cos(season_time * 6 * np.pi),
2 / np.exp(9 * season_time))
def seasonality(time, period, amplitude=1, phase=0):
"""Repeats the same pattern at each period"""
season_time = ((time + phase) % period) / period
return amplitude * seasonal_pattern(season_time)
def noise(time, noise_level=1, seed=None):
rnd = np.random.RandomState(seed)
return rnd.randn(len(time)) * noise_level
time = np.arange(10 * 365 + 1, dtype="float32")
baseline = 10
series = trend(time, 0.1)
baseline = 10
amplitude = 40
slope = 0.005
noise_level = 3
# Create the series
series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)
# Update with noise
series += noise(time, noise_level, seed=51)
split_time = 3000
time_train = time[:split_time]
x_train = series[:split_time]
time_valid = time[split_time:]
x_valid = series[split_time:]
There was a broadcasting issue in the loss function. Changing to the loss to one below fixes it:
loss = loss_function(tag_scores, l.view(-1,1))

How to find accuracy for logistic regression and gradient descent with training and validation data sets?

I am trying to implement logistic regression with gradient descent on the notMNIST dataset. This is my code thus far, which parses the data and plots the accuracy against the epochs. I have done my training in 7 mini batches of 500 each. There are a total of 5000 iterations and therefore 5000/7 epochs.
My goal is to find the accuracy after each epoch and plot it against the epoch. And I want to do the same with the average loss at each epoch. I want to do this for the validation points.
This is the loss function I am implementing.
However, for some reason, when I try to calculate accuracy I always get 100%, which doesn't make sense since I am finding the weight from the training and then using it on the validation set, so the algorithm cannot be correct 100% of the time. Also when I plot the losses, I get a linear function, which also doesn't make any sense.
Does anyone have ideas about what I am doing wrong? Any help would be appreciated!
#implement logistic regression
#logistic regression prediction function is y = sigmoid(W^Tx + b)
#train the logistic regression model using SGD and mini batch size B = 500 on the two-class notNMIST dataset
#how to train the dataset:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
##############Constants##################################
BATCH_SIZE = 500;
NUM_BATCHES = 7;
NUM_ITERATIONS = 5000;
LEARNING_RATE = [0.005]#0.001, 0.0001];
PIXEL_SIZE = 784; #28x28
NUM_TRAINING_POINTS = 3500;
NUM_VALID_POINTS = 100;
###############Extracting data############################
with np.load("notMNIST.npz") as data :
Data, Target = data ["images"], data["labels"]
posClass = 2
negClass = 9
dataIndx = (Target==posClass) + (Target==negClass)
Data = Data[dataIndx]/255.
Target = Target[dataIndx].reshape(-1, 1)
Target[Target==posClass] = 1
Target[Target==negClass] = 0
np.random.seed(521)
randIndx = np.arange(len(Data))
np.random.shuffle(randIndx)
Data, Target = Data[randIndx], Target[randIndx]
trainData, trainTarget = Data[:3500], Target[:3500]
validData, validTarget = Data[3500:3600], Target[3500:3600]
testData, testTarget = Data[3600:], Target[3600:]
################Manipulating Data##########################
trainX = np.reshape(trainData, (NUM_TRAINING_POINTS, PIXEL_SIZE));
validX = np.reshape(validData, (NUM_VALID_POINTS, PIXEL_SIZE))
batchesX = np.array(np.split(trainX, NUM_BATCHES));
batchesY = np.array(np.split(trainTarget, NUM_BATCHES));
################Defining variables########################
loss_Values = [[0 for x in range(NUM_BATCHES)] for y in range(715)]
lr = dict()
epoch_list = []
mean_list = []
accuracy_list = []
x = tf.placeholder(tf.float32, [PIXEL_SIZE, None], name = "input_points") #784 dimensions (28x28 pixels)
W = tf.Variable(tf.truncated_normal(shape=[PIXEL_SIZE,1], stddev=0.5), name='weights')
b = tf.Variable(0.0, name='bias')
y = tf.placeholder(tf.float32, [None,1], name = "target_labels")#target labels
lambda_ = 0.01
##############Calculations###############################
#weight_squared_sum = tf.matmul(tf.transpose(W),W) #find the square of the weight vector
#calculating the bias term
with tf.Session() as sess:
tf.global_variables_initializer().run()
weight = W.eval()
weight_squared_sum = np.linalg.norm(weight)
loss_W = lambda_ /2 * weight_squared_sum #find the loss
y_hat = tf.add(tf.matmul(tf.transpose(W), x), b) #based on the sigmoid equation
y_hat = tf.transpose(y_hat)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits = y_hat, labels = y) #sigmoid_cross_entropy_with_logits takes in the actual y and the predicted y
total_loss = tf.add(tf.reduce_mean(cross_entropy,0),loss_W)
#############Training######################################
epoch = 0
with tf.Session() as sess:
epoch = 0;
tf.global_variables_initializer().run()
for learning_rate in LEARNING_RATE:
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss) #change the learning rate each time
for i in range(NUM_BATCHES*NUM_ITERATIONS):
sess.run(train_step, feed_dict={x:np.transpose(batchesX[i%NUM_BATCHES]), y: batchesY[i%NUM_BATCHES]})
print("i: ",i)
print("LOSS:")
print(sess.run(total_loss, feed_dict={x:np.transpose(batchesX[i%NUM_BATCHES]), y: batchesY[i%NUM_BATCHES]}))
if( i % NUM_BATCHES == 0): #everytime we reach 0, a new epoch has started
loss_Values[epoch][i%NUM_BATCHES] = sess.run(cross_entropy, feed_dict={x: np.transpose(batchesX[i%NUM_BATCHES]) , y: batchesY[i%NUM_BATCHES]});
correct_prediction = tf.equal(y, y_hat)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracy_val = sess.run(accuracy, feed_dict={x: np.transpose(validX) , y: validTarget})
print("Accuracy: ", accuracy_val)
accuracy_list.append(accuracy_val)
epoch = epoch + 1;
lr[learning_rate] = loss_Values;
print("Final value")
#for plotting purposes
N = len(loss_Values)
for epoch in range (N): #find average over all input points in one epoch
epoch_list.append(epoch)
row = np.array(loss_Values[epoch])
mean = np.add.reduce(row) / 3500;
mean_list.append(mean)
epoch_list = np.array(epoch_list)
mean_list = np.array(epoch_list)
accuracy_list = np.array(epoch_list)
plt.figure()
plt.plot(epoch_list, accuracy_list, '-', label = 'Average loss')
plt.show()

Cost-sensitive loss function in Tensorflow

I'm doing research for cost-sensitive neural network based on Tensorflow. But because of the static graph structure of Tensorflow. Some NN structure couldn't be realized by myself.
My loss function(cost) ,cost matrix and the computational progress is described as follow and my target is to compute the total cost and then optimize the NN :
Approximately computational progress:
the y_ is the last full-connect output of a CNN which has shape (1024,5)
the y is a Tensor which has shape(1024) and indicates the ground truth of x[i]
the y_soft[i] [j] indicates the probability of x[i] to be class j
How can I realize this in Tensorflow?
cost_matrix:
[[0,1,100],
[1,0,1],
[1,20,0]]
label:
[1,2]
y*:
[[0,1,0],
[0,0,1]]
y(prediction):
[[0.2,0.3,0.5],
[0.1,0.2,0.7]]
label,cost_matrix-->cost_embedding:
[[1,0,1],
[1,20,0]]
It obvious 0.3 in [0.2,0.3,0.5] refers to right lable probility of [0,1,0], so it should not contibute to loss.
0.7 in [0.1,0.2,0.7] is the same. In other words, the pos with value 1 in y* not contibute to loss.
So I have (1-y*):
[[1,0,1],
[1,1,0]]
Then the entropy is target*log(predict) + (1-target) * log(1-predict),and value 0 in y*,should use (1-target)*log(1-predict), so I use (1-predict) said (1-y)
1-y:
[[0.8,*0.7*,0.5],
[0.9,0.8,*0.3*]]
(italic num is useless)
the custom loss is
[[1,0,1], [1,20,0]] * log([[0.8,0.7,0.5],[0.9,0.8,0.3]]) *
[[1,0,1],[1,1,0]]
and you can see the (1-y*) can be drop here
so the loss is -tf.reduce_mean(cost_embedding*log(1-y))
,to make it applicable , should be:
-tf.reduce_mean(cost_embedding*log(tf.clip((1-y),1e-10)))
the demo is below
import tensorflow as tf
import numpy as np
hidden_units = 50
num_class = 3
class Model():
def __init__(self,name_scope,is_custom):
self.name_scope = name_scope
self.is_custom = is_custom
self.input_x = tf.placeholder(tf.float32,[None,hidden_units])
self.input_y = tf.placeholder(tf.int32,[None])
self.instantiate_weights()
self.logits = self.inference()
self.predictions = tf.argmax(self.logits,axis=1)
self.losses,self.train_op = self.opitmizer()
def instantiate_weights(self):
with tf.variable_scope(self.name_scope + 'FC'):
self.W = tf.get_variable('W',[hidden_units,num_class])
self.b = tf.get_variable('b',[num_class])
self.cost_matrix = tf.constant(
np.array([[0,1,100],[1,0,100],[20,5,0]]),
dtype = tf.float32
)
def inference(self):
return tf.matmul(self.input_x,self.W) + self.b
def opitmizer(self):
if not self.is_custom:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits\
(labels=self.input_y,logits=self.logits)
else:
batch_cost_matrix = tf.nn.embedding_lookup(
self.cost_matrix,self.input_y
)
loss = - tf.log(1 - tf.nn.softmax(self.logits))\
* batch_cost_matrix
train_op = tf.train.AdamOptimizer().minimize(loss)
return loss,train_op
import random
batch_size = 128
norm_model = Model('norm',False)
custom_model = Model('cost',True)
split_point = int(0.9 * dataset_size)
train_set = datasets[:split_point]
test_set = datasets[split_point:]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
batch_index = random.sample(range(split_point),batch_size)
train_batch = train_set[batch_index]
train_labels = lables[batch_index]
_,eval_predict,eval_loss = sess.run([norm_model.train_op,
norm_model.predictions,norm_model.losses],
feed_dict={
norm_model.input_x:train_batch,
norm_model.input_y:train_labels
})
_,eval_predict1,eval_loss1 = sess.run([custom_model.train_op,
custom_model.predictions,custom_model.losses],
feed_dict={
custom_model.input_x:train_batch,
custom_model.input_y:train_labels
})
# print 'norm',eval_predict,'\ncustom',eval_predict1
print np.sum(((eval_predict == train_labels)==True).astype(np.int)),\
np.sum(((eval_predict1 == train_labels)==True).astype(np.int))
if i%10 == 0:
print 'norm_test',sess.run(norm_model.predictions,
feed_dict={
norm_model.input_x:test_set,
norm_model.input_y:lables[split_point:]
})
print 'custom_test',sess.run(custom_model.predictions,
feed_dict={
custom_model.input_x:test_set,
custom_model.input_y:lables[split_point:]
})

my deep learning code's accuray for solving xor logic is just 50%

import tensorflow as tf
import numpy as np
#Constant Declaration
LEARNING_RATE = 0.05
LEARNING_TIME = 10000
FILE_NAME = 'xor'
# Input Data Declaration
xy = np.loadtxt(FILE_NAME+'_data_set.txt',unpack=True,dtype='float32',delimiter=',')
x_data = np.transpose(xy[0:-1])
y_data = np.transpose(xy[-1])
print x_data
# Declaration Part
X = tf.placeholder(dtype = tf.float32,name="X-input")
Y = tf.placeholder(dtype = tf.float32,name="Y-input")
W1 = tf.Variable(tf.random_uniform([2,2],-1.0,1.0), name="Weight_1")
W2 = tf.Variable(tf.random_uniform([2,1],-1.0,1.0), name="Weight_2")
b1 = tf.Variable(tf.zeros([2]), name = 'Bias1')
b2 = tf.Variable(tf.zeros([1]), name = 'Bias2')
# Formula Part
with tf.name_scope("Layer1") as scope:
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
with tf.name_scope("Layer2") as scope:
hypothesis = tf.sigmoid(tf.matmul(L1,W2) + b2)
with tf.name_scope("Cost") as scope:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
cost_summ = tf.scalar_summary("cost",cost)
# Minimizing Part
a = tf.Variable(LEARNING_RATE)
with tf.name_scope("train") as scope:
optimizer = tf.train.GradientDescentOptimizer(a)
train = optimizer.minimize(cost)
# Initializing Part
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
merged = tf.merge_all_summaries()
writer = tf.train.SummaryWriter('./logs/'+FILE_NAME,sess.graph_def)
# Running Part
for step in range(LEARNING_TIME):
sess.run(train,feed_dict = {X:x_data,Y:y_data})
if step % 20 == 0:
print step, sess.run(cost,feed_dict = {X:x_data,Y:y_data}), sess.run(W2)
summary = sess.run(merged, feed_dict = {X:x_data,Y:y_data})
writer.add_summary(summary, step)
#Test Part
correction = tf.equal(tf.floor(hypothesis + 0.5),Y)
accuracy = tf.reduce_mean(tf.cast(correction,'float'))
print sess.run([hypothesis,tf.floor(hypothesis + 0.5),correction,accuracy], feed_dict = {X:x_data,Y:y_data})
Above is my tensorflow code for solving xor logic. But problem is that accuracy is just 50 percent. And cost Converge to 0.69321.
Actually I have seen lots of code that solves xor implemented in tensorflow, I can't find what is wrong.
belows are images indicate how my code works.

implementation a simple siamese network on Tensorflow

I want to implement a Siamese MLP network using mnist dataset.
I built my code based on Keras mnist_siamese_graph, but error value and accuracy are very huge compare to Keras version.
I cannot figure out where are problems.
This is my code:
import random
import numpy as np
import time
import tensorflow as tf
import input_data
mnist = input_data.read_data_sets("/tmp/data",one_hot=False)
import pdb
def create_pairs(x, digit_indices):
'''Positive and negative pair creation.
Alternates between positive and negative pairs.
'''
pairs = []
labels = []
n = min([len(digit_indices[d]) for d in range(10)]) - 1
for d in range(10):
for i in range(n):
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
pairs += [[x[z1], x[z2]]]
inc = random.randrange(1, 10)
dn = (d + inc) % 10
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
pairs += [[x[z1], x[z2]]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
def mlp(input_,input_dim,output_dim,name="mlp"):
with tf.variable_scope(name):
w = tf.get_variable('w',[input_dim,output_dim],tf.float32,tf.random_normal_initializer())
return tf.nn.relu(tf.matmul(input_,w))
def build_model_mlp(X_,_dropout):
model = mlpnet(X_,_dropout)
return model
def mlpnet(image,_dropout):
l1 = mlp(image,784,128,name='l1')
l1 = tf.nn.dropout(l1,_dropout)
l2 = mlp(l1,128,128,name='l2')
l2 = tf.nn.dropout(l2,_dropout)
l3 = mlp(l2,128,128,name='l3')
return l3
def contrastive_loss(y,d):
tmp= y *tf.square(d)
#tmp= tf.mul(y,tf.square(d))
tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0))
return tf.reduce_sum(tmp +tmp2)/batch_size/2
def compute_accuracy(prediction,labels):
return labels[prediction.ravel() < 0.5].mean()
#return tf.reduce_mean(labels[prediction.ravel() < 0.5])
def next_batch(s,e,inputs,labels):
input1 = inputs[s:e,0]
input2 = inputs[s:e,1]
y= np.reshape(labels[s:e],(len(range(s,e)),1))
return input1,input2,y
# Initializing the variables
init = tf.initialize_all_variables()
# the data, shuffled and split between train and test sets
X_train = mnist.train._images
y_train = mnist.train._labels
X_test = mnist.validation._images
y_test = mnist.validation._labels
batch_size =128
# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
tr_pairs, tr_y = create_pairs(X_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
te_pairs, te_y = create_pairs(X_test, digit_indices)
images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L')
images_R = tf.placeholder(tf.float32,shape=([None,784]),name='R')
labels = tf.placeholder(tf.float32,shape=([None,1]),name='gt')
dropout_f = tf.placeholder("float")
with tf.variable_scope("siamese") as scope:
model1= build_model_mlp(images_L,dropout_f)
scope.reuse_variables()
model2 = build_model_mlp(images_R,dropout_f)
distance = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(model1,model2),2),1,keep_dims=True))
loss = contrastive_loss(labels,distance)
#contrastice loss
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if 'l' in var.name]
batch = tf.Variable(0)
optimizer = tf.train.RMSPropOptimizer(0.001,momentum=0.9,epsilon=1e-6).minimize(loss)
# Launch the graph
with tf.Session() as sess:
#sess.run(init)
tf.initialize_all_variables().run()
# Training cycle
for epoch in range(40):
print('epoch %d' % epoch)
avg_loss = 0.
avg_acc = 0.
total_batch = int(X_train.shape[0]/batch_size)
start_time = time.time()
# Loop over all batches
for i in range(total_batch):
s = i * batch_size
e = (i+1) *batch_size
# Fit training using batch data
input1,input2,y =next_batch(s,e,tr_pairs,tr_y)
_,loss_value,predict=sess.run([optimizer,loss,distance], feed_dict={images_L:input1,images_R:input2 ,labels:y,dropout_f:0.9})
tr_acc = compute_accuracy(predict,y)
avg_loss += loss_value
avg_acc +=tr_acc*100
#print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))
duration = time.time() - start_time
print('epoch %d time: %f loss %0.2f acc %0.2f' %(epoch,duration,avg_loss/(total_batch),avg_acc/total_batch))
y = np.reshape(tr_y,(tr_y.shape[0],1))
predict=distance.eval(feed_dict={images_L:tr_pairs[:,0],images_R:tr_pairs[:,1],labels:y,dropout_f:1.0})
tr_acc = compute_accuracy(predict,y)
print('Accuract training set %0.2f' % (100 * tr_acc))