implementation a simple siamese network on Tensorflow - tensorflow

I want to implement a Siamese MLP network using mnist dataset.
I built my code based on Keras mnist_siamese_graph, but error value and accuracy are very huge compare to Keras version.
I cannot figure out where are problems.
This is my code:
import random
import numpy as np
import time
import tensorflow as tf
import input_data
mnist = input_data.read_data_sets("/tmp/data",one_hot=False)
import pdb
def create_pairs(x, digit_indices):
'''Positive and negative pair creation.
Alternates between positive and negative pairs.
'''
pairs = []
labels = []
n = min([len(digit_indices[d]) for d in range(10)]) - 1
for d in range(10):
for i in range(n):
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
pairs += [[x[z1], x[z2]]]
inc = random.randrange(1, 10)
dn = (d + inc) % 10
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
pairs += [[x[z1], x[z2]]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
def mlp(input_,input_dim,output_dim,name="mlp"):
with tf.variable_scope(name):
w = tf.get_variable('w',[input_dim,output_dim],tf.float32,tf.random_normal_initializer())
return tf.nn.relu(tf.matmul(input_,w))
def build_model_mlp(X_,_dropout):
model = mlpnet(X_,_dropout)
return model
def mlpnet(image,_dropout):
l1 = mlp(image,784,128,name='l1')
l1 = tf.nn.dropout(l1,_dropout)
l2 = mlp(l1,128,128,name='l2')
l2 = tf.nn.dropout(l2,_dropout)
l3 = mlp(l2,128,128,name='l3')
return l3
def contrastive_loss(y,d):
tmp= y *tf.square(d)
#tmp= tf.mul(y,tf.square(d))
tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0))
return tf.reduce_sum(tmp +tmp2)/batch_size/2
def compute_accuracy(prediction,labels):
return labels[prediction.ravel() < 0.5].mean()
#return tf.reduce_mean(labels[prediction.ravel() < 0.5])
def next_batch(s,e,inputs,labels):
input1 = inputs[s:e,0]
input2 = inputs[s:e,1]
y= np.reshape(labels[s:e],(len(range(s,e)),1))
return input1,input2,y
# Initializing the variables
init = tf.initialize_all_variables()
# the data, shuffled and split between train and test sets
X_train = mnist.train._images
y_train = mnist.train._labels
X_test = mnist.validation._images
y_test = mnist.validation._labels
batch_size =128
# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
tr_pairs, tr_y = create_pairs(X_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
te_pairs, te_y = create_pairs(X_test, digit_indices)
images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L')
images_R = tf.placeholder(tf.float32,shape=([None,784]),name='R')
labels = tf.placeholder(tf.float32,shape=([None,1]),name='gt')
dropout_f = tf.placeholder("float")
with tf.variable_scope("siamese") as scope:
model1= build_model_mlp(images_L,dropout_f)
scope.reuse_variables()
model2 = build_model_mlp(images_R,dropout_f)
distance = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(model1,model2),2),1,keep_dims=True))
loss = contrastive_loss(labels,distance)
#contrastice loss
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if 'l' in var.name]
batch = tf.Variable(0)
optimizer = tf.train.RMSPropOptimizer(0.001,momentum=0.9,epsilon=1e-6).minimize(loss)
# Launch the graph
with tf.Session() as sess:
#sess.run(init)
tf.initialize_all_variables().run()
# Training cycle
for epoch in range(40):
print('epoch %d' % epoch)
avg_loss = 0.
avg_acc = 0.
total_batch = int(X_train.shape[0]/batch_size)
start_time = time.time()
# Loop over all batches
for i in range(total_batch):
s = i * batch_size
e = (i+1) *batch_size
# Fit training using batch data
input1,input2,y =next_batch(s,e,tr_pairs,tr_y)
_,loss_value,predict=sess.run([optimizer,loss,distance], feed_dict={images_L:input1,images_R:input2 ,labels:y,dropout_f:0.9})
tr_acc = compute_accuracy(predict,y)
avg_loss += loss_value
avg_acc +=tr_acc*100
#print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))
duration = time.time() - start_time
print('epoch %d time: %f loss %0.2f acc %0.2f' %(epoch,duration,avg_loss/(total_batch),avg_acc/total_batch))
y = np.reshape(tr_y,(tr_y.shape[0],1))
predict=distance.eval(feed_dict={images_L:tr_pairs[:,0],images_R:tr_pairs[:,1],labels:y,dropout_f:1.0})
tr_acc = compute_accuracy(predict,y)
print('Accuract training set %0.2f' % (100 * tr_acc))

Related

TensorFlow training with large dataset takes too long

Yesterday, I have created a pretrained VGG19 with custom head and tried to train it with 60000 images. After more than 12 hours, the training of first epoch didn't complete.
The batch size has been set to 64 and the number of steps per epoch has been set to training_set_size/batch_size.
Below is the code of DataLoader:
IMAGE_CHANNEL = 3
def crop(image, margin):
return image[margin:-margin, margin:-margin]
def random_rotation(image, angle):
M = cv2.getRotationMatrix2D((0, 0),angle,1)
rows,cols, _ = image.shape
new_img = cv2.warpAffine(image, M, (cols, rows))
return new_img
def get_generator(in_gen, should_augment=True):
weights = None
if should_augment:
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(fill_mode='reflect',
data_format='channels_last',
brightness_range=[0.5, 1.5])
else:
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(fill_mode='reflect',
data_format='channels_last',
brightness_range=[1, 1])
for items in in_gen:
in_x, in_y = items
g_x = image_gen.flow(255 * in_x, in_y, batch_size=in_x.shape[0])
x, y = next(g_x)
yield x / 255.0, y
class DataLoader:
def __init__(self, source_filename, dataset_path, image_size, batch_size, training_set_size=0.8, sample_size=None):
path_dataset = Path(dataset_path)
path_image_folders = path_dataset / 'images'
self.data = pd.read_pickle(source_filename)
if sample_size is not None:
self.data = self.data[:sample_size]
self.image_size = image_size
self.batch_size = batch_size
self.training_set_size = training_set_size
self.steps_per_epoch = int(self.data.shape[0] * training_set_size // batch_size)
if self.steps_per_epoch == 0: self.steps_per_epoch = 1
self.validation_steps = int(self.data.shape[0] * (1 - training_set_size)//batch_size)
if self.validation_steps == 0: self.validation_steps = 1
def draw_idx(self, i):
img_path = self.data.iloc[i].image
img = tf.keras.preprocessing.image.img_to_array(tf.keras.preprocessing.image.load_img(str(img_path)))
# print(img.shape)
height, width, _ = img.shape
fig = plt.figure(figsize=(15, 15), facecolor='w')
# original image
ax = fig.add_subplot(1, 1, 1)
ax.imshow(img / 255.0)
openness = self.data.iloc[i].Openness
conscientiousness = self.data.iloc[i].Conscientiousness
extraversion = self.data.iloc[i].Extraversion
agreeableness = self.data.iloc[i].Agreeableness
neuroticism = self.data.iloc[i].Neuroticism
ax.title.set_text(
f'O: {openness}, C: {conscientiousness}, E: {extraversion}, A: {agreeableness}, N: {neuroticism}')
plt.axis('off')
plt.tight_layout()
plt.show()
def get_image(self, index, data, should_augment):
# Read image and appropiate landmarks
image = cv2.imread(data['image'].values[index])
h, w, _ = image.shape
o, c, e, a, n = data[['Openness', 'Conscientiousness', 'Extraversion', 'Agreeableness', 'Neuroticism']].values[
index]
should_flip = random.randint(0, 1)
should_rotate = random.randint(0, 1)
should_crop = random.randint(0, 1)
if should_augment:
if should_flip == 1:
# print("Image {} flipped".format(data['path'].values[index]))
image = cv2.flip(image, 1)
if should_rotate == 1:
angle = random.randint(-5, 5)
image = random_rotation(image, angle)
if should_crop == 1:
margin = random.randint(1, 10)
image = crop(image, margin)
image = cv2.resize(image, (self.image_size, self.image_size))
return [image, o, c, e, a, n]
def generator(self, data, should_augment=True):
while True:
# Randomize the indices to make an array
indices_arr = np.random.permutation(data.count()[0])
for batch in range(0, len(indices_arr), self.batch_size):
# slice out the current batch according to batch-size
current_batch = indices_arr[batch:(batch + self.batch_size)]
# initializing the arrays, x_train and y_train
x_train = np.empty(
[0, self.image_size, self.image_size, IMAGE_CHANNEL], dtype=np.float32)
y_train = np.empty([0, 5], dtype=np.int32)
for i in current_batch:
# get an image and its corresponding color for an traffic light
[image, o, c, e, a, n] = self.get_image(i, data, should_augment)
# Appending them to existing batch
x_train = np.append(x_train, [image], axis=0)
y_train = np.append(y_train, [[o, c, e, a, n]], axis=0)
# replace nan values with zeros
y_train = np.nan_to_num(y_train)
yield (x_train, y_train)
def get_training_and_test_generators(self, should_augment_training=True, should_augment_test=True):
msk = np.random.rand(len(self.data)) < self.training_set_size
train = self.data[msk]
test = self.data[~msk]
train_gen = self.generator(train, should_augment_training)
test_gen = self.generator(test, should_augment_test)
return get_generator(train_gen, should_augment_training), get_generator(test_gen, should_augment_test)
def show_batch_images_sample(self, images, landmarks, n_rows=3, n_cols=3):
assert n_rows * n_cols <= self.batch_size, "Number of expected images to display is larger than batch!"
fig = plt.figure(figsize=(15, 15))
xs, ys = [], []
count = 1
for img, y in zip(images, landmarks):
ax = fig.add_subplot(n_rows, n_cols, count)
ax.imshow(img)
h, w, _ = img.shape
o, c, e, a, n = y
ax.title.set_text(f'{o}, {c}, {e}, {a}, {n}')
ax.axis('off')
if count == n_rows * n_cols:
break
count += 1
class CallbackTensorboardImageOutput(Callback):
def __init__(self, model, generator, log_dir, feed_inputs_display=9):
# assert ((feed_inputs_display & (feed_inputs_display - 1)) == 0) and feed_inputs_display != 0
self.generator = generator
self.model = model
self.log_dir = log_dir
self.writer = tf.summary.create_file_writer(self.log_dir)
self.feed_inputs_display = feed_inputs_display
self.seen = 0
def plot_to_image(figure):
"""Converts the matplotlib plot specified by 'figure' to a PNG image and
returns it. The supplied figure is closed and inaccessible after this call."""
# Save the plot to a PNG in memory.
buf = io.BytesIO()
plt.savefig(buf, format='png')
# Closing the figure prevents it from being displayed directly inside
# the notebook.
plt.close(figure)
buf.seek(0)
# Convert PNG buffer to TF image
image = tf.image.decode_png(buf.getvalue(), channels=4)
# Add the batch dimension
image = tf.expand_dims(image, 0)
return image
#staticmethod
def get_loss(gt, predictions):
return tf.losses.mse(gt, predictions)
def on_epoch_end(self, epoch, logs={}):
self.seen += 1
if self.seen % 1 == 0:
items = next(self.generator)
images_to_display = self.feed_inputs_display
images_per_cell_count = int(math.sqrt(images_to_display))
# in case of regular model training using generator, an array is passed
if not isinstance(items, dict):
frames_arr, ocean_scores = items
# Take just 1st sample from batch
batch_size = frames_arr.shape[0]
if images_to_display > batch_size:
images_to_display = batch_size
frames_arr = frames_arr[0:images_to_display]
ocean_scores = ocean_scores[0:images_to_display]
y_pred = self.model.predict(frames_arr)
# in case of adversarial training, a dictionary is passed
else:
batch_size = items['feature'].shape[0]
if images_to_display > batch_size:
images_to_display = batch_size
# items['feature'] = items['feature'][0:images_to_display]
# landmarks = items['label'][0:images_to_display]
frames_arr = items['feature']
landmarks = items['label']
y_pred = self.model.predict(items)
figure = plt.figure(figsize=(15, 15))
for i in range(images_to_display):
image_current = frames_arr[i]
y_prediction_current = y_pred[i]
y_gt_current = ocean_scores[i]
lbl_prediction = 'plot/img/{}'.format(i)
ax = plt.subplot(images_per_cell_count, images_per_cell_count, i + 1, title=lbl_prediction)
ax.imshow(image_current)
ax.axis('off')
with self.writer.as_default():
tf.summary.image("Training Data", CallbackTensorboardImageOutput.plot_to_image(figure), step=self.seen)
Below is the definition of the network architecture and the call of fit_generator function:
data_loader = dataloader.DataLoader('dataset.pkl', '/home/niko/data/PsychoFlickr', 224, 64)
train_gen, test_gen = data_loader.get_training_and_test_generators()
pre_trained_model = tf.keras.applications.VGG19(input_shape=(data_loader.image_size, data_loader.image_size, dataloader.IMAGE_CHANNEL), weights='imagenet', include_top=False)
x = pre_trained_model.output
x = tf.keras.layers.Flatten()(x)
# Add a fully connected layer with 256 hidden units and ReLU activation
x = tf.keras.layers.Dense(256)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.Dropout(rate=0.5)(x)
x = tf.keras.layers.Dense(256)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.Dropout(rate=0.5)(x)
x = tf.keras.layers.Dense(5, name='regresion_output')(x)
x = tf.keras.layers.Activation('linear')(x)
model = tf.keras.Model(pre_trained_model.input, x)
print(model.summary())
log_dir = "logs/{}".format(model_name)
model_filename = "saved-models/{}.h5".format(model_name)
cb_tensorboard = TensorBoard(log_dir=log_dir)
callback_save_images = dataloader.CallbackTensorboardImageOutput(model, test_gen, log_dir)
checkpoint = ModelCheckpoint(model_filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
lr = 1e-3
opt = tf.optimizers.Adam(lr=lr)
model.compile(loss=loss_mse, optimizer=opt, metrics=[loss_mse])
history = model.fit_generator(
train_gen,
validation_data=test_gen,
steps_per_epoch=data_loader.steps_per_epoch,
epochs=20,
validation_steps=data_loader.validation_steps,
verbose=2,
use_multiprocessing=True,
callbacks=[checkpoint, callback_save_images, cb_tensorboard]
)
When I tried to run the same procedure with small sample data (200 records), everything seemed to work fine. On the dataset of 60000 records, however, after more than 12 hours the training of 1st epoch hasn't completed.
The training is performed on NVIDIA RTX2080Ti.
I would be thankful if anyone suggested what has to be modified or in general configured in order to train the network on reasonable time.

How to make lstm/rnn focus more on certain parts of time series while less on other parts using tensorflow?

I have a time series prediction problem where most of the observed values (95%) are 0s while remaining values are non-zeros. How can I make use of RNN for this problem.
I want to predict surface flow from environmental data(air temperature, rainfall, humidity etc). We know surface flow is 0.0 for most of the time in an year. However, I also don't want to simply ignore 0s as the 0s represent the period of the year when when surface flow is 0.0. The image below shows possible observed output and three inputs. The three inputs here are just random but in reality they will be data like rainfall, humidity etc and these input data have some periodic pattern.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import tensorflow as tf
import sys
print(sys.version)
print('tensorflow version: ', tf.__version__)
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.linspace(0, 20, self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy, percent_of_zeros):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
indices = np.random.choice(np.arange(dataY.size), replace=False,
size=int(dataY.size * percent_of_zeros))
dataY[indices] = 0
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 16
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 1005 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
# xx = abs(xx)
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
zeros = 0.96
dataX, dataY = gen_data.create_dataset(xx1,yy1, zeros)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
plt.plot(dataY, '.', label='output')
plt.plot(xx[:,0], '.', label='input1')
plt.plot(xx[:,1], '.', label='input2')
plt.plot(xx[:,2], '.', label='input3')
plt.legend()
# build neural network
with tf.variable_scope('scope0'): #defining RNN
# cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
cell = tf.keras.layers.LSTMCell(units = 128)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
# Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred1 = tf.keras.layers.Dense(1)(outputs1[:,-1])
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
print('epoch: # {} loss: {}'.format(epoch, step_loss))
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
The problem is that while training, the model focuses on majority of values i.e. 0s and thus makes the predictions equal to 0s. How can I make the model focus on non-zero values (positive surface flow) while at the same time also consider 0s (when there is no surface flow). I have read about attention mechanism but have not understood that how I can implement it in such scenarios.

Cost-sensitive loss function in Tensorflow

I'm doing research for cost-sensitive neural network based on Tensorflow. But because of the static graph structure of Tensorflow. Some NN structure couldn't be realized by myself.
My loss function(cost) ,cost matrix and the computational progress is described as follow and my target is to compute the total cost and then optimize the NN :
Approximately computational progress:
the y_ is the last full-connect output of a CNN which has shape (1024,5)
the y is a Tensor which has shape(1024) and indicates the ground truth of x[i]
the y_soft[i] [j] indicates the probability of x[i] to be class j
How can I realize this in Tensorflow?
cost_matrix:
[[0,1,100],
[1,0,1],
[1,20,0]]
label:
[1,2]
y*:
[[0,1,0],
[0,0,1]]
y(prediction):
[[0.2,0.3,0.5],
[0.1,0.2,0.7]]
label,cost_matrix-->cost_embedding:
[[1,0,1],
[1,20,0]]
It obvious 0.3 in [0.2,0.3,0.5] refers to right lable probility of [0,1,0], so it should not contibute to loss.
0.7 in [0.1,0.2,0.7] is the same. In other words, the pos with value 1 in y* not contibute to loss.
So I have (1-y*):
[[1,0,1],
[1,1,0]]
Then the entropy is target*log(predict) + (1-target) * log(1-predict),and value 0 in y*,should use (1-target)*log(1-predict), so I use (1-predict) said (1-y)
1-y:
[[0.8,*0.7*,0.5],
[0.9,0.8,*0.3*]]
(italic num is useless)
the custom loss is
[[1,0,1], [1,20,0]] * log([[0.8,0.7,0.5],[0.9,0.8,0.3]]) *
[[1,0,1],[1,1,0]]
and you can see the (1-y*) can be drop here
so the loss is -tf.reduce_mean(cost_embedding*log(1-y))
,to make it applicable , should be:
-tf.reduce_mean(cost_embedding*log(tf.clip((1-y),1e-10)))
the demo is below
import tensorflow as tf
import numpy as np
hidden_units = 50
num_class = 3
class Model():
def __init__(self,name_scope,is_custom):
self.name_scope = name_scope
self.is_custom = is_custom
self.input_x = tf.placeholder(tf.float32,[None,hidden_units])
self.input_y = tf.placeholder(tf.int32,[None])
self.instantiate_weights()
self.logits = self.inference()
self.predictions = tf.argmax(self.logits,axis=1)
self.losses,self.train_op = self.opitmizer()
def instantiate_weights(self):
with tf.variable_scope(self.name_scope + 'FC'):
self.W = tf.get_variable('W',[hidden_units,num_class])
self.b = tf.get_variable('b',[num_class])
self.cost_matrix = tf.constant(
np.array([[0,1,100],[1,0,100],[20,5,0]]),
dtype = tf.float32
)
def inference(self):
return tf.matmul(self.input_x,self.W) + self.b
def opitmizer(self):
if not self.is_custom:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits\
(labels=self.input_y,logits=self.logits)
else:
batch_cost_matrix = tf.nn.embedding_lookup(
self.cost_matrix,self.input_y
)
loss = - tf.log(1 - tf.nn.softmax(self.logits))\
* batch_cost_matrix
train_op = tf.train.AdamOptimizer().minimize(loss)
return loss,train_op
import random
batch_size = 128
norm_model = Model('norm',False)
custom_model = Model('cost',True)
split_point = int(0.9 * dataset_size)
train_set = datasets[:split_point]
test_set = datasets[split_point:]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
batch_index = random.sample(range(split_point),batch_size)
train_batch = train_set[batch_index]
train_labels = lables[batch_index]
_,eval_predict,eval_loss = sess.run([norm_model.train_op,
norm_model.predictions,norm_model.losses],
feed_dict={
norm_model.input_x:train_batch,
norm_model.input_y:train_labels
})
_,eval_predict1,eval_loss1 = sess.run([custom_model.train_op,
custom_model.predictions,custom_model.losses],
feed_dict={
custom_model.input_x:train_batch,
custom_model.input_y:train_labels
})
# print 'norm',eval_predict,'\ncustom',eval_predict1
print np.sum(((eval_predict == train_labels)==True).astype(np.int)),\
np.sum(((eval_predict1 == train_labels)==True).astype(np.int))
if i%10 == 0:
print 'norm_test',sess.run(norm_model.predictions,
feed_dict={
norm_model.input_x:test_set,
norm_model.input_y:lables[split_point:]
})
print 'custom_test',sess.run(custom_model.predictions,
feed_dict={
custom_model.input_x:test_set,
custom_model.input_y:lables[split_point:]
})

using tensorflow trian embedding, loss increase when each epoch start, then decrease

I write a simple code to do something like word2vec, but when training, I see the cross-entropy loss increases at each epochs beginning, then decrease. please help me find out if there is any mistake in my code, I have already reviewed many times...
from tensorflow.python import debug as tf_debug
import math
import os
import time
import random
import numpy as np
import tensorflow as tf
def gen_next_block(filenames,epochs):
for epoch in range(epochs):
for filename in filenames:
with open(filename) as f:
start = time.time()
line_cnt = 0
data = []
for line in f:
record = line.strip().split(',')
record = [int(record[0]), int(record[1]), float(record[2])] + [int(item) for item in record[3].split(';')]
record = record[:3] + [record[3 + epoch],]
data.append(record)
line_cnt += 1
if line_cnt % 4096000 == 0:
end = time.time()
elapsed_time = (end - start) * 1000
print("load block data: epoch %d, filename %s line_cnt %d, size %d, elapsed time %f ms" % (epoch, filename, line_cnt, len(data), elapsed_time))
random.shuffle(data)
yield data
data = []
start = time.time()
if len(data) > 0:
end = time.time()
elapsed_time = (end - start) * 1000
print("load block data: epoch %d, filename %s line_cnt %d, size %d, elapsed time %f ms" % (epoch, filename, line_cnt, len(data), elapsed_time))
random.shuffle(data)
yield data
data = None
next_block_generator = None
data_index = 0
last_time_data_index = 0
def generate_batch(filenames, epochs, batch_size):
global data
global data_index
global last_time_data_index
global next_block_generator
if next_block_generator is None:
next_block_generator = gen_next_block(filenames,epochs)
if data_index <= last_time_data_index:
data = next(next_block_generator,None)
data_index = 0
last_time_data_index = 0
if data is not None:
last_time_data_index = data_index
batch = np.ndarray(shape=(batch_size), dtype=np.int32)
labels = np.ndarray(shape=(batch_size), dtype=np.int32)
negative_labels = np.ndarray(shape=(batch_size), dtype=np.int32)
weights = np.ndarray(shape=(batch_size), dtype=np.float32)
negative_weights = np.ones(shape=(batch_size), dtype=np.float32)
for i in range(batch_size):
batch[i] = data[data_index][0]
labels[i] = data[data_index][1]
weights[i] = data[data_index][2]
negative_labels[i] = data[data_index][3]
data_index = (data_index + 1) % len(data)
return batch, labels, negative_labels, weights, negative_weights
else:
raise Exception("finish load file list [%s] %d times" % (','.join(filenames),epochs))
filename = 'data/dr_xianyu_item2vec_train_with_meta_20170725_dir/dr_xianyu_item2vec_train_with_meta_20170725_dir_'
filenames = [filename + str(i) for i in range(10)]
epochs = 5
batch_size = 2048
embedding_size = 32 # Dimension of the embedding vector.
num_sampled = batch_size # Number of negative examples to sample.
vocabulary_size = 7483025 + 1
graph = tf.Graph()
with graph.as_default():
with tf.device('/cpu:0'):
with tf.name_scope('input_data'):
train_inputs = tf.placeholder(tf.int32, shape=[batch_size], name = 'context_placeholder')
positive_labels = tf.placeholder(tf.int32, shape=[batch_size], name = 'target_placeholder')
negative_labels = tf.placeholder(tf.int32, shape=[num_sampled], name = 'negative_target_placeholder')
positive_weights = tf.placeholder(tf.float32, shape=([batch_size]), name = 'target_weight')
negative_weights = tf.placeholder(tf.float32, shape=([num_sampled]), name = 'negative_target_weight')
with tf.name_scope('emb_layer'):
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -0.5/embedding_size, 0.5/embedding_size), name = 'emb')
embed = tf.nn.embedding_lookup(embeddings, train_inputs)
with tf.name_scope("neg_layer"):
nce_weights = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -0.5/embedding_size, 0.5/embedding_size), name = 'nce_weight')
nce_biases = tf.Variable(tf.zeros([vocabulary_size]), name = 'nce_biase')
positive_embed = tf.nn.embedding_lookup(nce_weights,positive_labels)
positive_bias = tf.nn.embedding_lookup(nce_biases,positive_labels)
negative_embed = tf.nn.embedding_lookup(nce_weights,negative_labels)
negative_bias = tf.nn.embedding_lookup(nce_biases,negative_labels)
positive_logits = tf.reduce_sum(tf.multiply(embed,positive_embed),1) + positive_bias
negative_logits = tf.reduce_sum(tf.multiply(embed,negative_embed),1) + negative_bias
with tf.name_scope('loss_layer'):
positive_xent = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(positive_logits), logits = positive_logits)
negative_xent = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.zeros_like(negative_logits), logits = negative_logits)
weighted_positive_logits = tf.multiply(positive_logits,positive_weights)
weighted_negative_logits = tf.multiply(negative_logits,negative_weights)
loss = (tf.reduce_sum(positive_xent) + tf.reduce_sum(negative_xent)) /(batch_size*2)
with tf.name_scope('train'):
optimizer = tf.train.RMSPropOptimizer(0.001).minimize(loss)
# global_step = tf.Variable(0, trainable=False)
# starter_learning_rate = 0.1
# learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 20000, 0.8, staircase=True)
# optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
init = tf.global_variables_initializer()
init_local = tf.local_variables_initializer()
tf.summary.scalar('loss_layer/loss', loss)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
summary_op = tf.summary.merge_all()
saver = tf.train.Saver()
with tf.Session(graph=graph) as sess:
# sess = tf_debug.LocalCLIDebugWrapperSession(sess)
# sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
for v in tf.global_variables():
print(v.name,v.device,v.shape)
for v in tf.local_variables():
print(v.name,v.device,v.shape)
# if os.path.exists('tmp/model.ckpt.meta'):
# saver = tf.train.import_meta_graph('tmp/model.ckpt.meta')
# saver.restore(sess,tf.train.latest_checkpoint('tmp/'))
# print("model restored")
# else:
if True:
init.run()
init_local.run()
print("model init")
summary_writer = tf.summary.FileWriter('tmp/log', sess.graph)
average_loss = 0
start = time.time()
step = 1
try:
while True:
batch_inputs, batch_labels, batch_negative_labels, positive_weights_np, negative_weights_np = generate_batch(filenames, epochs,batch_size)
feed_dict = {train_inputs: batch_inputs, positive_labels: batch_labels, negative_labels: batch_negative_labels, positive_weights:positive_weights_np, negative_weights:negative_weights_np}
if step%1000 == 0:
loss_val,summary_str,_ = sess.run([loss, summary_op, optimizer], feed_dict=feed_dict)
summary_writer.add_summary(summary_str,step)
else:
loss_val,_ = sess.run([loss, optimizer], feed_dict=feed_dict)
average_loss += loss_val
if step % 1000 == 0:
average_loss /= 1000
end = time.time()
elapsed_time = (end - start)*1000 / 1000
print('Average loss at step ', step, ': ', average_loss, 'time cost', elapsed_time, 'ms')
average_loss = 0
start = time.time()
if step % 20000 == 0:
print('save model...')
save_path = saver.save(sess,'tmp/model.ckpt')
print("saved model in",save_path)
step +=1
except Exception,e:
print e
print("total batch count %d" % step)
summary_writer.flush()
there is my loss
the first pic is sgd generated in 5 epoch
the second pic is RMSProp generated in 2 epoch(still running)

How can I have visualized embeddings on tensorboard? Not MNIST data

I'm trying to create visualized graph on Tensorboard embeddings, I'm using csv data, not MNIST data, the data in csv is like follows:
0.266782506,"1,0"
0.361942522,"0,1"
0.862076491,"0,1"
The data in first column like 0.366782506 is sample input_data x, and "0,1" is the one-hot label y. while 0
I tried to take reference on how to creat visualized graph by embedding projector on Tensorboard, but I found examples only by using MNIST data, so I'm looking for help if anyone can give a guidance on how to create a visualized embedding graph on Tensorboard.
I can have SCALAR, GRAPH and HISTOGRAM visulized on Tensorboard with my code as following:
# coding=utf-8
import tensorflow as tf
import numpy
import os
import csv
import shutil
from tensorflow.contrib.tensorboard.plugins import projector
#Reading data from csv:
filename = open('D:\Program Files (x86)\logistic\sample_1.csv', 'r')
reader = csv.reader(filename)
t_X, t_Y,c = [],[],[]
a,b=0,0
for i in reader:
t_X.append(i[0])
a= int(i[1][0])
b= int(i[1][2])
c= list([a,b])
t_Y.extend([c])
t_X = numpy.asarray(t_X)
t_Y = numpy.asarray(t_Y)
t_XT = numpy.transpose([t_X])
filename.close()
# Parameters
learning_rate = 0.01
training_epochs = 5
batch_size = 50
display_step = 1
n_samples = t_X.shape[0]
sess = tf.InteractiveSession()
with tf.name_scope('Input'):
with tf.name_scope('x_input'):
x = tf.placeholder(tf.float32, [None, 1],name='x_input')
with tf.name_scope('y_input'):
y = tf.placeholder(tf.float32, [None, 2],name='y_input')
# Weight
with tf.name_scope('layer1'):
with tf.name_scope('weight'):
W = tf.Variable(tf.random_normal([1, 2],dtype=tf.float32),name='weight')
with tf.name_scope('bias'):
b = tf.Variable(tf.random_normal([2], dtype=tf.float32),name='bias')
# model
with tf.name_scope('Model'):
with tf.name_scope('pred'):
pred = tf.nn.softmax(tf.matmul(x, W) + b, name='pred')
with tf.name_scope('cost'):
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1),name='cost')
tf.summary.scalar('cost',cost)
tf.summary.histogram('cost',cost)
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Calculate accuracy
with tf.name_scope('accuracy_count'):
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy',accuracy)
tf.summary.histogram('accuracy', accuracy)
init = tf.global_variables_initializer()
merged = tf.summary.merge_all()
sess.run(init)
writer = tf.summary.FileWriter('D:\Tensorlogs\logs',sess.graph)
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(n_samples / batch_size)
i = 0
for anc in range(total_batch):
m,n = [],[]
m = t_X[i:i+batch_size]
n = t_Y[i:i+batch_size]
m = numpy.asarray(m)
n = numpy.asarray(n)
m = numpy.transpose([m])
summary, predr, o, c = sess.run([merged, pred, optimizer, cost],feed_dict={x: m, y: n})
avg_cost += c / total_batch
i = i + batch_size
writer.add_summary(summary, epoch+1)
if (epoch + 1) % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", avg_cost,"W=",wr,"b=",br,"accuracy_s=",accuracy_s.eval(feed_dict={x: t_XT, y: t_Y}))
print("Optimization Finished!")
Thank you ver much!