Understanding model loss/accuracy and how not to leak information - tensorflow

This question is related to the starting one posted here.
The problem is to classify rows so that the classification of row number i can rely on the data for all the previous rows including class membership. The linked post contains an answer which is posted bellow.
For the sake of experimentation I've used a set of randomly crafted data, where the classifying property is a 0,1 uniform random variable.
What strikes me is that the loss of the model in the above example is really low and the accuracy is 99% whereas I would expect something in the 50% range.
So I am assuming that the way the model is testing the classification is leaking information somehow.
Does anybody happen to see what's the issue? What would be the proper way to evaluate the accuracy in such scenario?
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from random import randint
SIZE = 100
df = pd.DataFrame({'Temperature': list(range(SIZE)),
'Weight': [randint(1,100) for _ in range(SIZE)],
'Size': [randint(1,10000) for _ in range(SIZE)],
'Property': [randint(0,1) for _ in range(SIZE)]})
df.Property = df.Property.shift(-1)
print ( df.head() )
# parameters
time_steps = 1
inputs = 3
outputs = 2
df = df.iloc[:-1,:]
df = df.values
train_X = df[:, :-1]
train_y = df[:, -1]
scaler = MinMaxScaler(feature_range=(0, 1))
train_X = scaler.fit_transform(train_X)
train_X = train_X[:,None,:]
onehot_encoder = OneHotEncoder()
encode_categorical = train_y.reshape(len(train_y), 1)
train_y = onehot_encoder.fit_transform(encode_categorical).toarray()
learning_rate = 0.001
epochs = 50000
batch_size = int(train_X.shape[0]/2)
length = train_X.shape[0]
display = 100
neurons = 100
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, time_steps, inputs])
y = tf.placeholder(tf.float32, [None, outputs])
cell = tf.contrib.rnn.BasicLSTMCell(num_units=neurons, activation=tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
labels=y, logits=out))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
accuracy = tf.metrics.accuracy(labels = tf.argmax(y, 1),
predictions = tf.argmax(out, 1),
name = "accuracy")
precision = tf.metrics.precision(labels=tf.argmax(y, 1),
predictions=tf.argmax(out, 1),
name="precision")
recall = tf.metrics.recall(labels=tf.argmax(y, 1),
predictions=tf.argmax(out, 1),
name="recall")
f1 = 2 * accuracy[1] * recall[1] / ( precision[1] + recall[1] )
with tf.Session() as sess:
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size),
range(batch_size, length+1, batch_size))
for (start, end) in mini_batch:
sess.run(training_op, feed_dict = {X: train_X[start:end,:,:],
y: train_y[start:end,:]})
if (steps+1) % display == 0:
loss_fn = loss.eval(feed_dict = {X: train_X, y: train_y})
print('Step: {} \tTraining loss: {}'.format((steps+1), loss_fn))
acc, prec, recall, f1 = sess.run([accuracy, precision, recall, f1],
feed_dict = {X: train_X, y: train_y})
print('\nEvaluation on training set')
print('Accuracy:', acc[1])
print('Precision:', prec[1])
print('Recall:', recall[1])
print('F1 score:', f1)

Related

How to print out prediction value in tensorflow

I am new to tensorflow and I am a slow learner. After successfully compiling the model and get the accuracy I want to print the prediction variable but I dont know how to do it.
My dataset is multivariate feature with only one output. The output contains only 1, 0 ,-1 so I made one hot encoder for the output. I finished compiling the model and looking for computing prediction on tensorflow online, however I didnt find a good solution base on my question.
The precisionCalculate function is to compute precision on each column on test data since the trian_y and test_y after one hot encode becomes [1,0,0],[0,1,0],[0,0,1].
I have tried
y_pred = sess.run(tf.argmax(y, 1), feed_dict={X: test_x, y: test_y})
but it turns out y_pred is exactly the same as my test_y
Here is my full code example.
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow.contrib.rnn
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
import pdb
np.set_printoptions(threshold=np.inf)
def precisionCalculate(pred_y, test_y):
count = pred_y + test_y
firstZero = len(count[count==0])
countFour = len(count[count == 4])
precision1 = firstZero / len(pred_y[pred_y==0] )
precision3 = countFour / len(pred_y[pred_y==2])
pdb.set_trace()
return precision1, precision3
df = pd.read_csv('new_df.csv', skiprows=[0], header=None)
df.drop(columns=[0,1], inplace=True)
df.columns = [np.arange(0, df.shape[1])]
df[0] = df[0].shift(-1)
#parameters
time_steps = 1
inputs = df.shape[1]
outputs = 3
#remove nan as a result of shift values
df = df.iloc[:-1, :]
#convert to numpy
df = df.values
train_number = 30276 #start date from 1018
train_x = df[: train_number, 1:]
test_x = df[train_number:, 1:]
train_y = df[:train_number, 0]
test_y = df[train_number:, 0]
#data pre-processing
#x y split
#scale
scaler = MinMaxScaler(feature_range=(0,1))
train_x = scaler.fit_transform(train_x)
test_x = scaler.fit_transform(test_x)
#reshape into 3d array
train_x = train_x[:, None, :]
test_x = test_x[:, None, :]
#one-hot encode the outputs
onehot_encoder = OneHotEncoder()
#encoder = LabelEncoder()
max_ = train_y.max()
max2 = test_y.max()
train_y = (train_y - max_) * (-1)
test_y = (test_y - max2) * (-1)
encode_categorical = train_y.reshape(len(train_y), 1)
encode_categorical2 = test_y.reshape(len(test_y), 1)
train_y = onehot_encoder.fit_transform(encode_categorical).toarray()
test_y = onehot_encoder.fit_transform(encode_categorical2).toarray()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)
#model parameters
learning_rate = 0.001
epochs = 100
batch_size = int(train_x.shape[0]/10)
length = train_x.shape[0]
display = 100
neurons = 100
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, time_steps, 90],name='x')
y = tf.placeholder(tf.float32, [None, outputs],name='y')
#LSTM cell
cell = tf.contrib.rnn.BasicLSTMCell(num_units = neurons, activation = tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# pass into Dense layer
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
# squared error loss or cost function for linear regression
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=y))
# optimizer to minimize cost
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
accuracy = tf.metrics.accuracy(labels = tf.argmax(y, 1), predictions = tf.argmax(out, 1), name = "accuracy")
precision = tf.metrics.precision(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1), name="precision")
recall = tf.metrics.recall(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1),name="recall")
f1 = 2 * accuracy[1] * recall[1] / ( precision[1] + recall[1] )
with tf.Session() as sess:
# initialize all variables
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
# Train the model
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size), range(batch_size, length+1, batch_size))
epoch_loss = 0
i = 0
# train data in mini-batches
for (start, end) in mini_batch:
sess.run(training_op, feed_dict = {X: train_x[start:end,:,:], y: train_y[start:end,:]})
# print training performance
if (steps+1) % display == 0:
# evaluate loss function on training set
loss_fn = loss.eval(feed_dict = {X: train_x, y: train_y})
print('Step: {} \tTraining loss: {}'.format((steps+1), loss_fn))
# evaluate model accuracy
acc, prec, recall, f1 = sess.run([accuracy, precision, recall, f1],feed_dict = {X: test_x, y: test_y})
y_pred = sess.run(tf.argmax(y, 1), feed_dict={X: train_x, y: train_y})
test_y_alter = np.argmax(test_y, axis=1)
#print(test_y_alter)
print(precisionCalculate(y_pred, test_y_alter))
print(y_pred)
#prediction = y_pred.eval(feed_dict={X: train_x, y: test_y})
#print(prediction)
print('\nEvaluation on test set')
print('Accuracy:', acc[1])
print('Precision:', prec[1])
print('Recall:', recall[1])
print('F1 score:', f1)
I think you should use the output of your model instead of the label (y) in tf.argmax.
Here is my code in order to print prediction of the model:
pred_y = tf.Print(tf.argmax(score, 1), [tf.argmax(score, 1)], message="prediction:)
pred_y.eval()
In the above code, score means the probability output of your model.

How to make lstm/rnn focus more on certain parts of time series while less on other parts using tensorflow?

I have a time series prediction problem where most of the observed values (95%) are 0s while remaining values are non-zeros. How can I make use of RNN for this problem.
I want to predict surface flow from environmental data(air temperature, rainfall, humidity etc). We know surface flow is 0.0 for most of the time in an year. However, I also don't want to simply ignore 0s as the 0s represent the period of the year when when surface flow is 0.0. The image below shows possible observed output and three inputs. The three inputs here are just random but in reality they will be data like rainfall, humidity etc and these input data have some periodic pattern.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import tensorflow as tf
import sys
print(sys.version)
print('tensorflow version: ', tf.__version__)
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.linspace(0, 20, self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy, percent_of_zeros):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
indices = np.random.choice(np.arange(dataY.size), replace=False,
size=int(dataY.size * percent_of_zeros))
dataY[indices] = 0
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 16
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 1005 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
# xx = abs(xx)
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
zeros = 0.96
dataX, dataY = gen_data.create_dataset(xx1,yy1, zeros)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
plt.plot(dataY, '.', label='output')
plt.plot(xx[:,0], '.', label='input1')
plt.plot(xx[:,1], '.', label='input2')
plt.plot(xx[:,2], '.', label='input3')
plt.legend()
# build neural network
with tf.variable_scope('scope0'): #defining RNN
# cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
cell = tf.keras.layers.LSTMCell(units = 128)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
# Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred1 = tf.keras.layers.Dense(1)(outputs1[:,-1])
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
print('epoch: # {} loss: {}'.format(epoch, step_loss))
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
The problem is that while training, the model focuses on majority of values i.e. 0s and thus makes the predictions equal to 0s. How can I make the model focus on non-zero values (positive surface flow) while at the same time also consider 0s (when there is no surface flow). I have read about attention mechanism but have not understood that how I can implement it in such scenarios.

Tensorflow: Can't overfit training data with batch size > 1

I coded a small RNN network with Tensorflow to return the total energy consumption given some parameters. There seem to be a problem in my code. It can't overfit the training data when I use a batch size > 1 (even with only 4 samples!). In the code below, the loss value reaches 0 when I set BatchSize to 1. However, by setting BatchSize to 2, the network fails to overfit and the loss value goes toward 12.500000 and gets stuck there forever.
I suspect this has something to do with LSTM states. I get the same problem if I don't update the state with each iteration. Or maybe the cost function? A help is appreciated. Thanks.
import tensorflow as tf
import numpy as np
import os
from utils import loadData
Epochs = 10000
LearningRate = 0.0001
MaxGradNorm = 5
SeqLen = 1
NChannels = 28
NClasses = 1
NLayers = 2
NUnits = 256
BatchSize = 1
NumSamples = 4
#################################################################
trainingFile = "./training.dat"
X_values, Y_values = loadData(trainingFile, SeqLen, NumSamples)
X = tf.placeholder(tf.float32, [BatchSize, SeqLen, NChannels], name='inputs')
Y = tf.placeholder(tf.float32, [BatchSize, SeqLen, NClasses], name='labels')
keep_prob = tf.placeholder(tf.float32, name='keep')
initializer = tf.contrib.layers.xavier_initializer()
Xin = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))
lstm_layers = []
for i in range(NLayers):
lstm_layer = tf.nn.rnn_cell.LSTMCell(num_units=NUnits, initializer=initializer, use_peepholes=True, state_is_tuple=True)
dropout_layer = tf.contrib.rnn.DropoutWrapper(lstm_layer, output_keep_prob=keep_prob)
#[LSTM ---> DROPOUT] ---> [LSTM ---> DROPOUT] ---> etc...
lstm_layers.append(dropout_layer)
rnn = tf.nn.rnn_cell.MultiRNNCell(lstm_layers, state_is_tuple=True)
initial_state = rnn.zero_state(BatchSize, tf.float32)
outputs, final_state = tf.nn.static_rnn(rnn, Xin, dtype=tf.float32, initial_state=initial_state)
outputs = tf.transpose(outputs, [1,0,2])
outputs = tf.reshape(outputs, [-1, NUnits])
weight = tf.Variable(tf.truncated_normal([NUnits, NClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[NClasses]))
prediction = tf.matmul(outputs, weight) + bias
prediction = tf.reshape(prediction, [BatchSize, SeqLen, NClasses])
cost = tf.reduce_sum(tf.pow(tf.subtract(prediction, Y), 2)) / (2 * BatchSize)
tvars = tf.trainable_variables()
grad, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), MaxGradNorm)
optimizer = tf.train.AdamOptimizer(learning_rate = LearningRate)
train_step = optimizer.apply_gradients(zip(grad, tvars))
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iteration = 1
for e in range(0, Epochs):
train_loss = []
state = sess.run(initial_state)
for i in xrange(0, len(X_values), BatchSize):
x = X_values[i:i + BatchSize]
y = Y_values[i:i + BatchSize]
y = np.expand_dims(y, 2)
feed = {X : x, Y : y, keep_prob : 1.0, initial_state : state}
_ , loss, state, pred = sess.run([train_step, cost, final_state, prediction], feed_dict = feed)
train_loss.append(loss)
iteration += 1
print("Epoch: {}/{}".format(e, Epochs), "Iteration: {:d}".format(iteration), "Train average rmse: {:6f}".format(np.mean(train_loss)))
Normalizing the input data solved the problem.

Implementing LSTM regression model with tensor flow

I am trying to implement a tensor flow LSTM regression model for a list of inputs number.
example:
input_data = [1, 2, 3, 4, 5]
time_steps = 2
-> X == [[1, 2], [2, 3], [3, 4]]
-> y == [3, 4, 5]
The code is below:
TIMESTEPS = 20
num_hidden=20
Xd, yd = load_data()
train_input = Xd['train']
train_input = train_input.reshape(-1,20,1)
train_output = yd['train']
# train_input = [[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],..
# train_output = [[21],[22],[23]....
test_input = Xd['test']
test_output = yd['test']
X = tf.placeholder(tf.float32, [None, 20, 1])
y = tf.placeholder(tf.float32, [None, 1])
cell = tf.nn.rnn_cell.LSTMCell(num_hidden, state_is_tuple=True)
val, state = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
val = tf.Print(val, [tf.argmax(val,1)], 'argmax(val)=' , summarize=20, first_n=7)
val = tf.transpose(val, [1, 0, 2])
val = tf.Print(val, [tf.argmax(val,1)], 'argmax(val2)=' , summarize=20, first_n=7)
# Take only the last output after 20 time steps
last = tf.gather(val, int(val.get_shape()[0]) - 1)
last = tf.Print(last, [tf.argmax(last,1)], 'argmax(val3)=' , summarize=20, first_n=7)
# define variables for weights and bias
weight = tf.Variable(tf.truncated_normal([num_hidden, int(y.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[y.get_shape()[1]]))
# Prediction is matmul of last value + wieght + bias
prediction = tf.matmul(last, weight) + bias
# Cost function using softmax
# y is the true distrubution and prediction is the predicted
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(prediction), reduction_indices=[1]))
#cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cost)
from tensorflow.python import debug as tf_debug
inita = tf.initialize_all_variables()
sess = tf.Session()
sess.run(inita)
batch_size = 100
no_of_batches = int(len(train_input)/batch_size)
epoch = 10
test_size = 100
for i in range(epoch):
for start, end in zip(range(0, len(train_input), batch_size), range(batch_size, len(train_input)+1, batch_size)):
sess.run(minimize, feed_dict={X: train_input[start:end], y: train_output[start:end]})
test_indices = np.arange(len(test_input)) # Get A Test Batch
np.random.shuffle(test_indices)
test_indices = test_indices[0:test_size]
print (i, mean_squared_error(np.argmax(test_output[test_indices], axis=1), sess.run(prediction, feed_dict={X: test_input[test_indices]})))
print ("predictions", prediction.eval(feed_dict={X: train_input}, session=sess))
y_pred = prediction.eval(feed_dict={X: test_input}, session=sess)
sess.close()
test_size = test_output.shape[0]
ax = np.arange(0, test_size, 1)
plt.plot(ax, test_output, 'r', ax, y_pred, 'b')
plt.show()
But i am not able to minimize the cost, the calculated MSE increases at each step instead of decreasing.
I suspect there is a problem with the cost problem that i am using.
any thoughts or suggestions as to what i am doing wrong ?
Thanks
As mentioned in the comment, you had to change your loss function to the MSE function and reduce your learning rate. Is your error converging to zero ?

How can I have visualized embeddings on tensorboard? Not MNIST data

I'm trying to create visualized graph on Tensorboard embeddings, I'm using csv data, not MNIST data, the data in csv is like follows:
0.266782506,"1,0"
0.361942522,"0,1"
0.862076491,"0,1"
The data in first column like 0.366782506 is sample input_data x, and "0,1" is the one-hot label y. while 0
I tried to take reference on how to creat visualized graph by embedding projector on Tensorboard, but I found examples only by using MNIST data, so I'm looking for help if anyone can give a guidance on how to create a visualized embedding graph on Tensorboard.
I can have SCALAR, GRAPH and HISTOGRAM visulized on Tensorboard with my code as following:
# coding=utf-8
import tensorflow as tf
import numpy
import os
import csv
import shutil
from tensorflow.contrib.tensorboard.plugins import projector
#Reading data from csv:
filename = open('D:\Program Files (x86)\logistic\sample_1.csv', 'r')
reader = csv.reader(filename)
t_X, t_Y,c = [],[],[]
a,b=0,0
for i in reader:
t_X.append(i[0])
a= int(i[1][0])
b= int(i[1][2])
c= list([a,b])
t_Y.extend([c])
t_X = numpy.asarray(t_X)
t_Y = numpy.asarray(t_Y)
t_XT = numpy.transpose([t_X])
filename.close()
# Parameters
learning_rate = 0.01
training_epochs = 5
batch_size = 50
display_step = 1
n_samples = t_X.shape[0]
sess = tf.InteractiveSession()
with tf.name_scope('Input'):
with tf.name_scope('x_input'):
x = tf.placeholder(tf.float32, [None, 1],name='x_input')
with tf.name_scope('y_input'):
y = tf.placeholder(tf.float32, [None, 2],name='y_input')
# Weight
with tf.name_scope('layer1'):
with tf.name_scope('weight'):
W = tf.Variable(tf.random_normal([1, 2],dtype=tf.float32),name='weight')
with tf.name_scope('bias'):
b = tf.Variable(tf.random_normal([2], dtype=tf.float32),name='bias')
# model
with tf.name_scope('Model'):
with tf.name_scope('pred'):
pred = tf.nn.softmax(tf.matmul(x, W) + b, name='pred')
with tf.name_scope('cost'):
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1),name='cost')
tf.summary.scalar('cost',cost)
tf.summary.histogram('cost',cost)
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Calculate accuracy
with tf.name_scope('accuracy_count'):
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy',accuracy)
tf.summary.histogram('accuracy', accuracy)
init = tf.global_variables_initializer()
merged = tf.summary.merge_all()
sess.run(init)
writer = tf.summary.FileWriter('D:\Tensorlogs\logs',sess.graph)
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(n_samples / batch_size)
i = 0
for anc in range(total_batch):
m,n = [],[]
m = t_X[i:i+batch_size]
n = t_Y[i:i+batch_size]
m = numpy.asarray(m)
n = numpy.asarray(n)
m = numpy.transpose([m])
summary, predr, o, c = sess.run([merged, pred, optimizer, cost],feed_dict={x: m, y: n})
avg_cost += c / total_batch
i = i + batch_size
writer.add_summary(summary, epoch+1)
if (epoch + 1) % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", avg_cost,"W=",wr,"b=",br,"accuracy_s=",accuracy_s.eval(feed_dict={x: t_XT, y: t_Y}))
print("Optimization Finished!")
Thank you ver much!