TensorFlow convergence is stuck to a high value - tensorflow

I was playing around with TensorFlow and I was looking at the tutorial from:
https://github.com/aymericdamien/TensorFlow-Examples/tree/0.11/examples/3_NeuralNetworks
Because I did not want to do the MNINST database, I changed the script with some data I have created with 8000 training samples. The evaluation is done with 300 test samples. The output is a binary classification. Bear in mind that I just dived in Machine learning and that my knowledge is quite restricted for now.
The script works fine, however my cost is stuck at a very high value and does not converge to 0. First, is it normal? How can I improve this? Did I do something wrong?
Second the accuracy is not very good either, is it due to the bad convergence? Maybe 8000 is not enough to train the model? or the value are too scattered to actually be able to get a better accuracy.
I found a similar problem here:
tensorflow deep neural network for regression always predict same results in one batch
but I do not understand why or how this problem applies to me.
Could someone help me please?
Here is what the output is:
Starting 1st session...
Epoch: 0001 cost= 39926820.730
and at the end I get:
Epoch: 0671 cost= 64.798
Epoch: 0681 cost= 64.794
Epoch: 0691 cost= 64.791
Optimization Finished!
Accuracy: 0.716621
The codes is as follow:
import tensorflow as tf
import pandas as pd
import numpy as np
import csv
inputData = pd.read_csv('./myInputDataNS.csv', header=None)
runData = pd.read_csv('./myTestDataNS.csv', header=None)
trX, trY = inputData.iloc[:, :7].values, inputData.iloc[:,7].values
temp = trY.shape
trY = trY.reshape(temp[0], 1)
trY = np.concatenate((1-trY, trY), axis=1)
teX, teY = runData.iloc[:, :7].values, runData.iloc[:, 7].values
temp = teY.shape
teY = teY.reshape(temp[0], 1)
teY = np.concatenate((1-teY, teY), axis=1)
# Parameters
learning_rate = 0.001
training_epochs = 700
batch_size = 100
display_step = 10
# Network Parameters
n_hidden_1 = 320
n_hidden_2 = 320
n_hidden_3 = 320
n_input = 7
n_classes = 2 # (0 or 1)
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
out_layer = tf.matmul(layer_3, weights['out']) + biases['out']
return out_layer
weights = {
'h1': tf.Variable(tf.random_normal([len(trX[0]), n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'h3': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_3])),
'out': tf.Variable(tf.random_normal([n_hidden_3, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'b3': tf.Variable(tf.random_normal([n_hidden_3])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = multilayer_perceptron(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
print("Starting 1st session...")
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
epoch_loss = 0
i = 0
while i < len(trX):
start = i
end = i + batch_size
batch_x = np.array(trX[start:end])
batch_y = np.array(trY[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_loss += c
i += batch_size
epoch_loss += c / len(trX[0])
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.3f}".format(epoch_loss))
print("Optimization Finished!")
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({x: teX, y: teY}))

Related

Implement TFlearn imdb lstm example by tensorflow

I'm implementing tflearn's lstm imdb example by tensorflow.
I used the same dataset, architecture and hyper-parameters (embedding size, max length of sentence and so on) as tflearn model do, but my model's performance is poor than the tflearn example(after 10 epochs, my model got about 52% accuracy while the example got near 80% ).
I'd appreciated it a lot if you can give me some advice to achieve the appropriate performance of the example.
Below is my code:
import tensorflow as tf
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
from tensorflow.contrib.rnn import BasicLSTMCell
import time
n_class = 2
n_words = 10000
EMBEDDING_SIZE = 128
HIDDEN_SIZE = 128
MAX_LENGTH = 100
lr = 1e-3
epoch = 10
TRAIN_SIZE = 22500
validation_size = 2500
batch_size = 128
KP = 0.8
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=n_words,
valid_portion=0.1, sort_by_len=False)
trainX, trainY = train
validationX, validationY = test
testX, testY = _
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=MAX_LENGTH, value=0.)
validationX = pad_sequences(validationX, maxlen=MAX_LENGTH, value=0.)
testX = pad_sequences(testX, maxlen=MAX_LENGTH, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, n_class)
validationY = to_categorical(validationY, n_class)
testY = to_categorical(testY, n_class)
graph = tf.Graph()
with graph.as_default():
# input
text = tf.placeholder(tf.int32, [None, MAX_LENGTH])
labels = tf.placeholder(tf.float32, [None, n_class])
keep_prob = tf.placeholder(tf.float32)
embeddings_var = tf.Variable(tf.truncated_normal([n_words, EMBEDDING_SIZE]), trainable=True)
text_embedded = tf.nn.embedding_lookup(embeddings_var, text)
print(text_embedded.shape) # [batch_size, length, embedding_size]
word_list = tf.unstack(text_embedded, axis=1)
cell = BasicLSTMCell(HIDDEN_SIZE)
dropout_cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob)
outputs, encoding = tf.nn.static_rnn(dropout_cell, word_list, dtype=tf.float32)
logits = tf.layers.dense(outputs[-1], n_class, activation=None)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
prediction = tf.argmax(logits, 1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, tf.argmax(labels, 1)), tf.float32))
train_steps = epoch * TRAIN_SIZE // batch_size + 1
print("Train steps: ", train_steps)
with tf.Session(graph=graph) as sess:
tf.global_variables_initializer().run()
print("Initialized!")
s = time.time()
offset = 0
for step in range(train_steps):
offset = (offset * step) % (TRAIN_SIZE - batch_size)
batch_text = trainX[offset: offset + batch_size, :]
batch_label = trainY[offset: offset + batch_size, :]
fd = {text: batch_text, labels: batch_label, keep_prob: KP}
_, l, acc = sess.run([optimizer, loss, accuracy], feed_dict=fd)
if step % 100 == 0:
print("Step: %d loss: %f accuracy: %f" % (step, l, acc))
if step % 500 == 0:
v_l, v_acc = sess.run([loss, accuracy], feed_dict={
text: validationX,
labels: validationY,
keep_prob: 1.0
})
print("------------------------------------------------")
print("Validation: step: %d loss: %f accuracy: %f" % (step, v_l, v_acc))
print("------------------------------------------------")
print("Training finished, time consumed:", time.time() - s, " s")
print("Test accuracy: %f" % accuracy.eval(feed_dict={
text: testX,
labels: testY,
keep_prob: 1.0
}))
Sorry, I made a stupid mistake!
The loss :
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
are supposed to be
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
then, the accuracy is like the tflearn example

Tensorflow Python Type error let me understand whats wrong

Hello fellow Tensorflow specialists!
I have the following code mainly copied from an example. I've got 38 inputs and 4 outputs in my NN. I want to teach the NN and make predictions. But if I run the program I got an error saying:
b = tf.Variable(tf.float32, [None, n_class])
TypeError: Expected binary or unicode string, got tf.float32
together with:
TypeError: Failed to convert object of type to Tensor. Contents: . Consider casting elements to a supported type.
import csv
import matplotlib as plt
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
with open('input.csv', "r") as myfile:
reader = csv.reader(myfile, delimiter=",")
data = list(reader)
data = np.asarray(data)
x = data[:,0:39]
y = data[:,39:43]
train_x, test_x, train_y, test_y = train_test_split(x,y, test_size=0.05)
learning_rate = 0.3
training_epochs = 1000
cost_history = np.empty(shape=[1], dtype=float)
n_dim = x.shape[1]
print("n_dim =" , n_dim)
n_class = 2
# model_path =
n_hidden_1 = 60
n_hidden_2 = 60
n_hidden_3 = 60
X = tf.placeholder(tf.float32, [None, n_dim])
W = tf.Variable(tf.zeros([n_dim, n_class]))
b = tf.Variable(tf.float32, [None, n_class])
y_ = tf.placeholder(tf.float32, [None, n_class])
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.sigmoid(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.sigmoid(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
out_layer = tf.matmul(layer_3, weights['out']) + biases['out']
return out_layer
weights = {
'h1': tf.Variable(tf.truncated_normal([n_dim, n_hidden_1])),
'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
'h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3])),
'out': tf.Variable(tf.truncated_normal([n_hidden_3, n_class]))
}
biases = {
'b1' : tf.Variable(tf.truncated_normal([n_hidden_1])),
'b2' : tf.Variable(tf.truncated_normal([n_hidden_2])),
'b3' : tf.Variable(tf.truncated_normal([n_hidden_3])),
'out' : tf.Variable(tf.truncated_normal([n_class]))
}
init = tf.global_variables_initializer()
saver = tf.train.Saver()
cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_func)
sess = tf.Session()
sess.run(init)
mse_history = []
accuracy_history = []
for epoch in range(training_epochs):
sess.run(training_step, feed_dict={x: train_x, y_: train_y})
cost = sess.run(cost_func, feed_dict={x: train_x, y_: train_y})
cost_history = np.append(cost_history, cost)
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square(pred_y - test_y))
mse_ = sess.run(mse)
mse_history.append(mse_)
accuracy = (sess.run(accuracy, feed_dict={x: train_x, y_: train_y}))
accuracy_history.append(accuracy)
print('epoch: ', epoch, ' - cost: ', cost, " - MSE: ", mse_, " - Train Accuracy: ", accuracy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Test Accuracy: ", (sess.run(accuracy, feed_dict={x: test_x, y_: test_y})))
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square((pred_y - test_y)))
print ("MSE %.4f" % sess.run(mse))
Do you have any suggestions what to do next?

tensorflow - nn =>accuracy print error

i use nn using the tensorflow.
multiful input => linear regression .
i'm not exactly tensorflow example..
just i wannna success this example becuase of just checking.
( input data is fruit & water & vegetable
output value is real number(concentration)
So, i think this example is similar.
if you have more good example, please give me .. thank you.
if this source print accuracy , this have a error.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
from sklearn.model_selection import train_test_split
boston = learn.datasets.load_dataset('boston')
x, y = boston.data, boston.target
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.6, random_state=42)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 10
display_step = 1
dropout_rate = 0.9
# Network Parameters
n_hidden_1 = 32 # 1st layer number of features
n_hidden_2 = 200 # 2nd layer number of features
n_hidden_3 = 200
n_hidden_4 = 256
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input기
x = tf.placeholder("float", [None,13])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Hidden layer with RELU activation
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
# Hidden layer with RELU activation
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
# Output layer with linear activation
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0, 0.1)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)),
'b3': tf.Variable(tf.random_normal([n_hidden_3], 0, 0.1)),
'b4': tf.Variable(tf.random_normal([n_hidden_4], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(tf.transpose(pred)-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# # Initializing the variables
# init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in range(3):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
You compute accuracy outside of the session.
Move it under with tf.Session() as sess:.

Solve a prediction task using Tensor Flow

I've been trying to solve a prediction/regression problem by using Tensor Flow, but i'm facing some problems. Let me give you some context before I explain my real problem.
The data I've been playing with are a set of 5 features, let's call them [f1, f2, f3, f4, f5], representing in somehow a particular phenomena identified by a real value (target).
What I've been trying to do is to train a Multi Layer Perceptron that learns the relationship among the features and the target values. In a nutshell i'd like to predict real values on the base of what the neural network as seen during the training phase.
I've identified this problem as prediction/regression problem and wrote down the following code:
#picking device to run on
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
# Parameters
learning_rate = 0.001
training_epochs = 99999
batch_size = 4096
STDDEV = 0.1
# Network Parameters
n_hidden_1 = 10 # 1st layer number of neurons
n_hidden_2 = 10 # 2nd layer number of neurons
n_hidden_3 = 10 # 3nd layer number of neurons
n_hidden_4 = 10 # 4nd layer number of neurons
n_hidden_5 = 10 # 5nd layer number of neurons
n_input = 5 # number of features
n_classes = 1 # one target value (float)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# LOADING DATA
data_train = loader.loadDataset(dir_feat, train_path, 'TRAIN', features)
data_test = loader.loadDataset(dir_feat, test_path, 'TEST', features)
valid_period = 5
test_period = 10
def multilayer_perceptron(x, weights, biases):
# Hidden layer with sigmoid activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.sigmoid(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.sigmoid(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3= tf.nn.sigmoid(layer_3)
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.sigmoid(layer_4)
layer_5 = tf.add(tf.matmul(layer_4, weights['h5']), biases['b5'])
layer_5 = tf.nn.sigmoid(layer_5)
# Output layer with linear activation
out = tf.matmul(layer_5, weights['out']) + biases['out']
return out
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1],stddev=STDDEV)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2],stddev=STDDEV)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3],stddev=STDDEV)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4],stddev=STDDEV)),
'h5': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_5],stddev=STDDEV)),
'out': tf.Variable(tf.random_normal([n_hidden_5, n_classes],stddev=STDDEV))
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'b3': tf.Variable(tf.random_normal([n_hidden_3])),
'b4': tf.Variable(tf.random_normal([n_hidden_4])),
'b5': tf.Variable(tf.random_normal([n_hidden_5])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
def RMSE():
return tf.sqrt(tf.reduce_mean(tf.square(y - pred)))
cost = RMSE()
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(1, training_epochs):
avg_cost = 0.
avg_R_square_train = []
train_dataset = loader.Dataset(data=data_train, batch_size=batch_size, num_feats=n_input)
total_batch = train_dataset.getNumberBatches()
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = train_dataset.next_batch(update=True)
# Run optimization op (backprop) and cost op (to get loss value)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
c_train = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
# Compute average loss
avg_cost += c_train / total_batch
print("Epoch:" + str(epoch) + ", TRAIN_loss = {:.9f}".format(avg_cost))
# TESTING
if epoch % test_period == 0:
c_test = sess.run(cost, feed_dict={x: data_test[0][0], y: data_test[0][1]})
print("Epoch:" + str(epoch) + ", TEST_loss = {:.9f}".format(c_test))
The problem I've encountered is that the cost function for the test set (after some iterations) got stuck in a local minima and doesn't decrease anymore.
Epoch:6697, TRAIN_loss = 2.162182076
Epoch:6698, TRAIN_loss = 2.156500859
Epoch:6699, TRAIN_loss = 2.157814605
Epoch:6700, TRAIN_loss = 2.160744122
Epoch:6700, TEST_loss = 2.301288128
Epoch:6701, TRAIN_loss = 2.139338647
...
Epoch:6709, TRAIN_loss = 2.166410744
Epoch:6710, TRAIN_loss = 2.162357884
Epoch:6710, TEST_loss = 2.301478863
Epoch:6711, TRAIN_loss = 2.143475396
...
Epoch:6719, TRAIN_loss = 2.145476401
Epoch:6720, TRAIN_loss = 2.150237552
Epoch:6720, TEST_loss = 2.301517725
Epoch:6721, TRAIN_loss = 2.151232243
...
Epoch:6729, TRAIN_loss = 2.163080522
Epoch:6730, TRAIN_loss = 2.160523321
Epoch:6730, TEST_loss = 2.301782370
...
Epoch:6739, TRAIN_loss = 2.156920952
Epoch:6740, TRAIN_loss = 2.162290675
Epoch:6740, TEST_loss = 2.301943779
...
I've tried to change the several hyper-parameters such as the number of hidden layer and/or the number of nodes, the learning rate, the batch size, etc but the situation doesn't change at all. I have also tried to use other loss function such as MAE, MSE.
Actually the number of data sample i have is roughly 270,000.
Can someone please suggest me how to solve this problem or give me some useful advices about it?
Thanks in advance.
Davide

Tensorflow neural network has high error even in really easy dataset

I'm trying to implement a 1 hidden layer NN for a regression problem. The loss function improves for a few iterations than it gets stuck on a really high error even for a very easy data. Could someone help me find the bug? Here is my code:
import tensorflow as tf
import scipy.io as sio
import numpy as np
reuse_weights = 1
n_nodes_hl1 = 10
batch_size = 200
hm_epochs = 20
# load input from matlab
input_training = sio.loadmat('xMat.mat')
input_training = input_training['xMat']
input_test = sio.loadmat('xMat.mat')
input_test = input_test['xMat']
# find number of measurements and input length
n_measurements = input_training.shape[0]
input_length = input_training.shape[1]
# current input
data_y = input_training[:, input_length - 1].astype(float)
data_x = input_training[:, 0 : input_length - 1].astype(float)
test_data_y = input_test[:, input_length - 1].astype(float)
test_data_x = input_test[:, 0 : input_length - 1].astype(float)
x = tf.placeholder('float32',[None, input_length - 1])
y = tf.placeholder('float32')
# place holder for Dropout algorithm drop probability
keep_prob = tf.placeholder('float32')
def next_batch(data):
"""
Return a total of `batch_size` samples from the array `data`.
"""
if len(data.shape) == 2:
idx = np.arange(0, len(data[:,0])) # get all possible indexes
else:
idx = np.arange(0, len(data)) # get all possible indexes
np.random.shuffle(idx) # shuffle indexes
idx = idx[0:batch_size] # use only `batch_size` random indexes
if len(data.shape) == 2:
data_shuffle = [data[i,:] for i in idx] # get list of `batch_size` random samples
else:
data_shuffle = [data[i] for i in idx] # get list of `batch_size` random samples
data_shuffle = np.asarray(data_shuffle) # get back numpy array
return data_shuffle
def neural_network_model(data, weights, biases, keep_prob):
layer1 = tf.add(tf.matmul(data, weights['h1']), biases['b1'])
layer1 = tf.nn.sigmoid(layer1)
output = tf.add(tf.matmul(layer1, weights['out']), biases['out'])
return output
if reuse_weights:
weights = {
'h1': tf.Variable(sio.loadmat('weights_h1.mat')['weights_h1'], name="weights_h1"),
'out': tf.Variable(sio.loadmat('weights_out.mat')['weights_out'], name="weights_out")
}
biases = {
'b1': tf.Variable(sio.loadmat('biases_b1.mat')['biases_b1'], name="biases_b1"),
'out': tf.Variable(sio.loadmat('biases_out.mat')['biases_out'], name="biases_out")
}
else: # initialize weights
weights = {
'h1': tf.Variable(tf.random_normal([input_length - 1, n_nodes_hl1]), name="weights_h1"),
'out': tf.Variable(tf.random_normal([n_nodes_hl1, 1]), name="weights_out")
}
biases = {
'b1': tf.Variable(tf.random_normal([n_nodes_hl1]), name="biases_b1"),
'out': tf.Variable(tf.random_normal([1]), name="biases_out")
}
def train_neural_network(x):
prediction = neural_network_model(x, weights, biases, keep_prob)[:,0]
cost = tf.reduce_mean(tf.abs(prediction - y))
optimizer = tf.train.AdamOptimizer()
opt = optimizer.minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(weights['h1'])
for epoch in range(hm_epochs): #training
epoch_loss = 0
for _ in range(int(n_measurements/batch_size)):
_, c, p = sess.run([opt, cost, prediction], feed_dict = {x:next_batch(data_x),\
y:next_batch(data_y) , keep_prob : 1.0})
epoch_loss += c
print('Epoch', epoch, 'completed out of', hm_epochs, 'Average loss:', epoch_loss/int(n_measurements/batch_size))
# prediction
accuracy = tf.reduce_mean(tf.abs(prediction - y))
# Feed 1.0 for keep prob during testing
print("Training data accuracy:", accuracy.eval({x: data_x, y: data_y, keep_prob : 1.0}))
print("Training data predictions:", prediction.eval({x: data_x[0:5,:], keep_prob : 1.0}))
print("Training data:",data_y[0:5])
#print("Test data accuracy:", accuracy.eval({x: test_data_x, y: test_data_y, keep_prob : 1.0}))
# save numpy arrays
sio.savemat('weights_h1.mat', {'weights_h1': weights['h1'].eval()})
sio.savemat('biases_b1.mat', {'biases_b1': biases['b1'].eval()})
sio.savemat('weights_out.mat', {'weights_out': weights['out'].eval()})
sio.savemat('biases_out.mat', {'biases_out': biases['out'].eval()})
train_neural_network(x)
Figured it out, the problem was with the data shuffling. The input and response were shuffled differently (two times random shuffle for each epoch) and thus the input data in each epoch did not correspond to the response data.