TensorFlow: why use fp result y rather than ExponentialMovingAverage fp result average_y as cross_entropy's parameter? - tensorflow

The code is as below using python 3,Anaconda Spyder3.6,Tensorflow 1.0.0
"""
Created on Sat Oct 14 11:00:54 2017
#author: Han.H
"""
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001 #lambda
TRAINING_STEPS = 20000
MOVING_AVERAGE_DECAY = 0.99
# when not use ExponentialMovingAverage,just nomal fp
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
if avg_class == None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
return tf.matmul(layer1, weights2) + biases2
else:
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)
# build a 3-layer full connected NN
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
# normal fp
y = inference(x, None, weights1, biases1, weights2, biases2)
global_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# L2
regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
regularaztion = regularizer(weights1) + regularizer(weights2)
loss = cross_entropy_mean + regularaztion
# Set learning rate
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
# Gradient descent
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
test_feed = {x: mnist.test.images, y_: mnist.test.labels}
for i in range(TRAINING_STEPS):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
xs,ys=mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op,feed_dict={x:xs,y_:ys})
test_acc=sess.run(accuracy,feed_dict=test_feed)
print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))
def main(argv=None):
# Main programme here
mnist = input_data.read_data_sets("F:/python/MNIST_data/", one_hot=True)
train(mnist)
if __name__=='__main__':
main()
This code has no problem and run well. I just want to know that why can't use average_y as logits to calculate cross entropy.I tried to do so.It turned out terrible results.The accuracy was as random-initialized firstly as 0.009.

Related

Recurrent neural network, time series prediction with newer Tensorflow 1.14

How to use new tf.keras API with recurrent neural network? I have checked the documentation but there is no example of such a situation.
There is this great book Hands on machine learning from 2017. Since that year the API of tensorflow has evolved and I am trying to rewrite recurrent neural network for time series prediction with using version 1.14 code.
The code from the book is using older tf.nn.dynamic_rnn and tf.nn.rnn_cell.BasicRNNCell:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
learning_rate = 0.001
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)
rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 500
batch_size = 50
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
And this code works just fine (except that it throws warnings about deprecation left and right). I wanted to use tf.keras API as suggested in warning. My code is the same except:
cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation=tf.nn.relu)
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
But this yields following exception:
InvalidArgumentError: Input to reshape is a tensor with 50 values, but the requested shape requires a multiple of 20
[[node Reshape_1 (defined at <ipython-input-9-879361be49dd>:3) ]]
so I understand that the problematic line is
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
After checking and comparing documentation for both cells https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn and
https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN I can't find the culprit.
What is the difference with these two cells? How to use tf.keras API with time series?
Full old code: https://github.com/ageron/handson-ml/blob/master/14_recurrent_neural_networks.ipynb
Full "my" code:
import numpy as np
import tensorflow as tf
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
from utils import shuffle_batch, variable_summaries
import os
dir_path = os.getcwd()
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
print(dir_path)
t_min, t_max = -5, 5
section_start = (t_max + t_min) / 2
resolution = 0.1
n_steps = 20
def time_series(t):
return np.sin(t)
def next_batch(batch_size, n_steps):
t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
Ts = t0 + np.arange(0., n_steps + 1) * resolution
ys = time_series(Ts)
return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)
t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))
t_instance = np.linspace(start = section_start, stop = section_start + resolution * (n_steps + 1),num = n_steps + 1)
plt.figure(figsize=(11,4))
plt.subplot(121)
plt.title("A time series (generated)", fontsize=14)
plt.plot(t, time_series(t), label=r"original")
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "b-", linewidth=3, label="A training instance")
plt.legend(loc="lower left", fontsize=14)
#plt.axis([-10, 10, -17, 13])
plt.xlabel("Time")
plt.ylabel("Value")
plt.subplot(122)
plt.title("A training instance", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "c*", markersize=10, label="target")
plt.legend(loc="upper left")
plt.xlabel("Time")
# In[6]:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
# In[7]:
cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation=tf.nn.relu)
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
print(rnn_outputs.get_shape())
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons], name='reshape1')
stacked_outputs = tf.keras.layers.Dense(n_outputs,name="hidden2")(stacked_rnn_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs], name='reshape2')
learning_rate = 0.001
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 1500
batch_size = 50
save_path =os.path.join(dir_path,"model","recurrent_sinus_model")
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
saver.save(sess, save_path)
with tf.Session() as sess:
saver.restore(sess, save_path)
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("Testing the model", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "w*", markersize=10, label="target")
plt.plot(t_instance[1:], y_pred[0,:,0], "r.", markersize=10, label="prediction")
plt.legend(loc="upper left")
plt.xlabel("Time")
plt.show()
# In[ ]:
with tf.Session() as sess:
saver.restore(sess, save_path)
X_new = time_series(np.array(t.reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("A time series (generated)", fontsize=14)
plt.plot(t, time_series(t), label=r"original",linewidth=5,c='r')
plt.plot(t[:-1], time_series(t[:-1]), "b-", linewidth=3, label="A training instance")
plt.legend(loc="lower left", fontsize=14)
plt.xlabel("Time")
plt.ylabel("Value")
So the answer is:
rnn_outputs, rnn_states = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1", return_state=True, return_sequences=True)(X)
instead of
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
so the parameter return_sequences=True make the RNN return the time series as well, and well, this is the point.

Implement TFlearn imdb lstm example by tensorflow

I'm implementing tflearn's lstm imdb example by tensorflow.
I used the same dataset, architecture and hyper-parameters (embedding size, max length of sentence and so on) as tflearn model do, but my model's performance is poor than the tflearn example(after 10 epochs, my model got about 52% accuracy while the example got near 80% ).
I'd appreciated it a lot if you can give me some advice to achieve the appropriate performance of the example.
Below is my code:
import tensorflow as tf
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
from tensorflow.contrib.rnn import BasicLSTMCell
import time
n_class = 2
n_words = 10000
EMBEDDING_SIZE = 128
HIDDEN_SIZE = 128
MAX_LENGTH = 100
lr = 1e-3
epoch = 10
TRAIN_SIZE = 22500
validation_size = 2500
batch_size = 128
KP = 0.8
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=n_words,
valid_portion=0.1, sort_by_len=False)
trainX, trainY = train
validationX, validationY = test
testX, testY = _
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=MAX_LENGTH, value=0.)
validationX = pad_sequences(validationX, maxlen=MAX_LENGTH, value=0.)
testX = pad_sequences(testX, maxlen=MAX_LENGTH, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, n_class)
validationY = to_categorical(validationY, n_class)
testY = to_categorical(testY, n_class)
graph = tf.Graph()
with graph.as_default():
# input
text = tf.placeholder(tf.int32, [None, MAX_LENGTH])
labels = tf.placeholder(tf.float32, [None, n_class])
keep_prob = tf.placeholder(tf.float32)
embeddings_var = tf.Variable(tf.truncated_normal([n_words, EMBEDDING_SIZE]), trainable=True)
text_embedded = tf.nn.embedding_lookup(embeddings_var, text)
print(text_embedded.shape) # [batch_size, length, embedding_size]
word_list = tf.unstack(text_embedded, axis=1)
cell = BasicLSTMCell(HIDDEN_SIZE)
dropout_cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob)
outputs, encoding = tf.nn.static_rnn(dropout_cell, word_list, dtype=tf.float32)
logits = tf.layers.dense(outputs[-1], n_class, activation=None)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
prediction = tf.argmax(logits, 1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, tf.argmax(labels, 1)), tf.float32))
train_steps = epoch * TRAIN_SIZE // batch_size + 1
print("Train steps: ", train_steps)
with tf.Session(graph=graph) as sess:
tf.global_variables_initializer().run()
print("Initialized!")
s = time.time()
offset = 0
for step in range(train_steps):
offset = (offset * step) % (TRAIN_SIZE - batch_size)
batch_text = trainX[offset: offset + batch_size, :]
batch_label = trainY[offset: offset + batch_size, :]
fd = {text: batch_text, labels: batch_label, keep_prob: KP}
_, l, acc = sess.run([optimizer, loss, accuracy], feed_dict=fd)
if step % 100 == 0:
print("Step: %d loss: %f accuracy: %f" % (step, l, acc))
if step % 500 == 0:
v_l, v_acc = sess.run([loss, accuracy], feed_dict={
text: validationX,
labels: validationY,
keep_prob: 1.0
})
print("------------------------------------------------")
print("Validation: step: %d loss: %f accuracy: %f" % (step, v_l, v_acc))
print("------------------------------------------------")
print("Training finished, time consumed:", time.time() - s, " s")
print("Test accuracy: %f" % accuracy.eval(feed_dict={
text: testX,
labels: testY,
keep_prob: 1.0
}))
Sorry, I made a stupid mistake!
The loss :
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
are supposed to be
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
then, the accuracy is like the tflearn example

tf.layers.batch_normalization freezes during sess.run() (1.5.0-dev20171031)

The graph building phase passes without error, but the program freezes (no reading hard drive, no memory change, no ...) during sess.run() in the first mini-batch in the first epoch. If I remove this layer or replace it with tf.contrib.layers.layer_norm, the program runs without issues.
The tensor (x) I pass into tf.layers.batch_normalization has the shape [#batches, 200]. I use most default values, but turned off the center and scale.
x_BN = tf.layers.batch_normalization(
x,
axis=-1,
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name=None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
The tensorflow version I'm using is tf-nightly-gpu (1.5.0-dev20171031 or 1.5.0-dev20171023). Has anyone encountered a similar problem?
Update
This happens when the input of tf.layers.batch_normalization is from tf.nn.bidirectional_dynamic_rnn, please see a simplified code to reproduce this issue:
import tensorflow as tf
import numpy as np
starter_learning_rate = 0.001
decay_steps = 100
decay_rate = 0.96
num_RNN_layers = 3
LSTM_CELL_SIZE = 100
keep_prob = 0.95
with tf.name_scope('Inputs'):
x = tf.placeholder(dtype=tf.float32, shape=[None, 200])
y = tf.placeholder(dtype=tf.float32, shape=[None, 200])
length = tf.placeholder(dtype=tf.int32, shape=[None])
Flg_training = tf.placeholder(dtype=tf.bool, shape=[])
x_1 = tf.expand_dims(x, -1)
with tf.name_scope('BiLSTM'):
dropcells = []
for iiLyr in list(range(num_RNN_layers)):
cell_iiLyr = tf.nn.rnn_cell.LSTMCell(num_units=LSTM_CELL_SIZE, state_is_tuple=True)
dropcells.append(tf.nn.rnn_cell.DropoutWrapper(cell=cell_iiLyr, output_keep_prob=keep_prob)) #,, input_keep_prob=self.keep_prob input_keep_prob=1.0, seed=None
MultiLyr_cell = tf.nn.rnn_cell.MultiRNNCell(cells=dropcells, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=MultiLyr_cell,
cell_bw=MultiLyr_cell,
dtype=tf.float32,
sequence_length=length, #tf_b_lens
inputs=x_1, #stacked_RefPts_desc, #tf_b_VCCs_AMs_BN1
scope = "BiLSTM"
)
#output_fw, output_bw = outputs
states_fw, states_bw = states
c_fw_lstLyr, h_fw_lstLyr = states_fw[-1]
c_bw_lstLyr, h_bw_lstLyr = states_bw[-1]
states_concat1 = tf.concat([h_fw_lstLyr, h_bw_lstLyr], axis = 1, name = 'states_concat')
with tf.name_scope("cs_BN1"):
x_BN = tf.layers.batch_normalization(
states_concat1,
axis=-1, # axis that should be normalized (typically the features axis, in this case the concated states or hidden vectors)
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name="test_BN", #None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
with tf.name_scope("Regression"):
a = tf.get_variable("a", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
b = tf.get_variable("b", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
with tf.name_scope("Prediction"):
y_pred = tf.multiply(x_BN, a) + b
with tf.name_scope('Loss'):
losses = tf.losses.mean_squared_error(y, y_pred, reduction=tf.losses.Reduction.NONE)
mean_loss = tf.reduce_mean(losses)
with tf.name_scope('Training'):
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
decay_steps, decay_rate, staircase=True)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(losses, global_step=global_step)
#x_mean = tf.reduce_mean(x_BN, axis=0)
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter("G:\\Surface_Ozone\\Temp\\", sess.graph)
sess.run(tf.global_variables_initializer())
for ii in list(range(2000)):
x_in = (np.random.rand(20, 200))
y_in = x_in * 1.5 + 3.0
length_in = np.full([20], 200, dtype=np.int32)
_, mean_loss_val, a_val, b_val = sess.run([train_step, mean_loss, a, b], feed_dict={
x: x_in,
Flg_training: True,
y: y_in,
length: length_in
})
if (ii < 50):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
else:
if (ii % 100 == 0):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
print("Normal End.")

Tensorflow Python Type error let me understand whats wrong

Hello fellow Tensorflow specialists!
I have the following code mainly copied from an example. I've got 38 inputs and 4 outputs in my NN. I want to teach the NN and make predictions. But if I run the program I got an error saying:
b = tf.Variable(tf.float32, [None, n_class])
TypeError: Expected binary or unicode string, got tf.float32
together with:
TypeError: Failed to convert object of type to Tensor. Contents: . Consider casting elements to a supported type.
import csv
import matplotlib as plt
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
with open('input.csv', "r") as myfile:
reader = csv.reader(myfile, delimiter=",")
data = list(reader)
data = np.asarray(data)
x = data[:,0:39]
y = data[:,39:43]
train_x, test_x, train_y, test_y = train_test_split(x,y, test_size=0.05)
learning_rate = 0.3
training_epochs = 1000
cost_history = np.empty(shape=[1], dtype=float)
n_dim = x.shape[1]
print("n_dim =" , n_dim)
n_class = 2
# model_path =
n_hidden_1 = 60
n_hidden_2 = 60
n_hidden_3 = 60
X = tf.placeholder(tf.float32, [None, n_dim])
W = tf.Variable(tf.zeros([n_dim, n_class]))
b = tf.Variable(tf.float32, [None, n_class])
y_ = tf.placeholder(tf.float32, [None, n_class])
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.sigmoid(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.sigmoid(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
out_layer = tf.matmul(layer_3, weights['out']) + biases['out']
return out_layer
weights = {
'h1': tf.Variable(tf.truncated_normal([n_dim, n_hidden_1])),
'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
'h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3])),
'out': tf.Variable(tf.truncated_normal([n_hidden_3, n_class]))
}
biases = {
'b1' : tf.Variable(tf.truncated_normal([n_hidden_1])),
'b2' : tf.Variable(tf.truncated_normal([n_hidden_2])),
'b3' : tf.Variable(tf.truncated_normal([n_hidden_3])),
'out' : tf.Variable(tf.truncated_normal([n_class]))
}
init = tf.global_variables_initializer()
saver = tf.train.Saver()
cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_func)
sess = tf.Session()
sess.run(init)
mse_history = []
accuracy_history = []
for epoch in range(training_epochs):
sess.run(training_step, feed_dict={x: train_x, y_: train_y})
cost = sess.run(cost_func, feed_dict={x: train_x, y_: train_y})
cost_history = np.append(cost_history, cost)
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square(pred_y - test_y))
mse_ = sess.run(mse)
mse_history.append(mse_)
accuracy = (sess.run(accuracy, feed_dict={x: train_x, y_: train_y}))
accuracy_history.append(accuracy)
print('epoch: ', epoch, ' - cost: ', cost, " - MSE: ", mse_, " - Train Accuracy: ", accuracy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Test Accuracy: ", (sess.run(accuracy, feed_dict={x: test_x, y_: test_y})))
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square((pred_y - test_y)))
print ("MSE %.4f" % sess.run(mse))
Do you have any suggestions what to do next?

Tensorflow why my logistic cost not change?

I am new to tensorflow and now I want to do a classification job by self-defined layers. I have build a model which concat previous several DNN layers output to a big tensor, and then do the logistic binary classification. Like this(but without RNN):
Model Structure
This is my code:
import tensorflow as tf
import numpy as np
import pandas as pd
from load_data import load_data_from_file
TRAIN_FILE = 'train.csv'
TEST_FILE = 'test.csv'
X_train, y_train = load_data_from_file(TRAIN_FILE)
y_train = y_train[np.newaxis]
y_train = y_train.transpose()
n_x_dnn = X_train.shape[1]
n_hidden_dnn = 50
n_merge_dnn1 = 5 * n_hidden_dnn
n_dnn6 = 50
hm_epochs = 10
batch_size = 50
X = tf.placeholder(tf.float32, [None, n_x_dnn])
y = tf.placeholder(tf.float32, [None, 1])
weights = {
'dnn1': tf.Variable(tf.random_normal([n_x_dnn, n_hidden_dnn])),
'dnn2': tf.Variable(tf.random_normal([n_hidden_dnn, n_hidden_dnn])),
'dnn3': tf.Variable(tf.random_normal([n_hidden_dnn, n_hidden_dnn])),
'dnn4': tf.Variable(tf.random_normal([n_hidden_dnn, n_hidden_dnn])),
'dnn5': tf.Variable(tf.random_normal([n_hidden_dnn, n_hidden_dnn])),
'dnn_merge': tf.Variable(tf.random_normal([n_merge_dnn1, n_dnn6])),
'dnn6': tf.Variable(tf.random_normal([n_dnn6, 1]))
}
biases = {
'dnn1_b': tf.Variable(tf.random_normal([n_hidden_dnn])),
'dnn2_b': tf.Variable(tf.random_normal([n_hidden_dnn])),
'dnn3_b': tf.Variable(tf.random_normal([n_hidden_dnn])),
'dnn4_b': tf.Variable(tf.random_normal([n_hidden_dnn])),
'dnn5_b': tf.Variable(tf.random_normal([n_hidden_dnn])),
'dnn_merge_b': tf.Variable(tf.random_normal([n_dnn6])),
'dnn6_b': tf.Variable(tf.random_normal([1])),
}
def define_layers():
# dnn layer 1
dnn_layer1 = tf.add(tf.matmul(X, weights['dnn1']), biases['dnn1_b'])
dnn_layer1 = tf.nn.relu(dnn_layer1)
# dnn layer 2
dnn_layer2 = tf.add(tf.matmul(dnn_layer1, weights['dnn2']), biases['dnn2_b'])
dnn_layer2 = tf.nn.relu(dnn_layer2)
# dnn layer 3
dnn_layer3 = tf.add(tf.matmul(dnn_layer2, weights['dnn3']), biases['dnn3_b'])
dnn_layer3 = tf.nn.relu(dnn_layer3)
# dnn layer 4
dnn_layer4 = tf.add(tf.matmul(dnn_layer3, weights['dnn4']), biases['dnn4_b'])
dnn_layer4 = tf.nn.relu(dnn_layer4)
# dnn layer 5
dnn_layer5 = tf.add(tf.matmul(dnn_layer4, weights['dnn5']), biases['dnn5_b'])
dnn_layer5 = tf.nn.relu(dnn_layer5)
# merge layer
merged = tf.concat([dnn_layer1, dnn_layer2, dnn_layer3, dnn_layer4, dnn_layer5], 1)
dnn_merge = tf.add(tf.matmul(merged, weights['dnn_merge']), biases['dnn_merge_b'])
dnn_merge = tf.nn.relu(dnn_merge)
# dnn layer 6
dnn_layer6 = tf.add(tf.matmul(dnn_merge, weights['dnn6']), biases['dnn6_b'])
dnn_layer6 = tf.nn.sigmoid(dnn_layer6)
return dnn_layer6
logits = define_layers()
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
# epoch_loss = 0
i = 0
while i < len(X_train):
start = i
end = i + batch_size
batch_x = np.array(X_train[start:end])
batch_y = np.array(y_train[start:end])
_, c, l = sess.run([optimizer, cost, logits], feed_dict={X: batch_x,
y: batch_y})
# epoch_loss += c
i += batch_size
# print(c, l)
print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', c, 'logits:', l)
save_path = saver.save(sess, "model/NF_dnn_and_rnn_model.ckpt")
print("Model saved in file: %s" % save_path)
X_test, y_test = load_data_from_file(TEST_FILE)
predict = sess.run(logits, feed_dict={X: X_test})
print(predict)
And from output it shows my cost not changed:
Epoch 7 completed out of 10 loss: 1.27325 logits: [[ 1.]
Epoch 8 completed out of 10 loss: 1.27325 logits: [[ 1.]
Epoch 9 completed out of 10 loss: 1.27325 logits: [[ 1.]
You should remove the sigmoid activation from your last layer dnn_layer6 as sigmoid_cross_entropy_with_logits also applies sigmoid activation internally.