Recurrent neural network, time series prediction with newer Tensorflow 1.14 - tensorflow

How to use new tf.keras API with recurrent neural network? I have checked the documentation but there is no example of such a situation.
There is this great book Hands on machine learning from 2017. Since that year the API of tensorflow has evolved and I am trying to rewrite recurrent neural network for time series prediction with using version 1.14 code.
The code from the book is using older tf.nn.dynamic_rnn and tf.nn.rnn_cell.BasicRNNCell:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
learning_rate = 0.001
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)
rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 500
batch_size = 50
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
And this code works just fine (except that it throws warnings about deprecation left and right). I wanted to use tf.keras API as suggested in warning. My code is the same except:
cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation=tf.nn.relu)
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
But this yields following exception:
InvalidArgumentError: Input to reshape is a tensor with 50 values, but the requested shape requires a multiple of 20
[[node Reshape_1 (defined at <ipython-input-9-879361be49dd>:3) ]]
so I understand that the problematic line is
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
After checking and comparing documentation for both cells https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn and
https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN I can't find the culprit.
What is the difference with these two cells? How to use tf.keras API with time series?
Full old code: https://github.com/ageron/handson-ml/blob/master/14_recurrent_neural_networks.ipynb
Full "my" code:
import numpy as np
import tensorflow as tf
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
from utils import shuffle_batch, variable_summaries
import os
dir_path = os.getcwd()
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
print(dir_path)
t_min, t_max = -5, 5
section_start = (t_max + t_min) / 2
resolution = 0.1
n_steps = 20
def time_series(t):
return np.sin(t)
def next_batch(batch_size, n_steps):
t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
Ts = t0 + np.arange(0., n_steps + 1) * resolution
ys = time_series(Ts)
return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)
t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))
t_instance = np.linspace(start = section_start, stop = section_start + resolution * (n_steps + 1),num = n_steps + 1)
plt.figure(figsize=(11,4))
plt.subplot(121)
plt.title("A time series (generated)", fontsize=14)
plt.plot(t, time_series(t), label=r"original")
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "b-", linewidth=3, label="A training instance")
plt.legend(loc="lower left", fontsize=14)
#plt.axis([-10, 10, -17, 13])
plt.xlabel("Time")
plt.ylabel("Value")
plt.subplot(122)
plt.title("A training instance", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "c*", markersize=10, label="target")
plt.legend(loc="upper left")
plt.xlabel("Time")
# In[6]:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
# In[7]:
cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation=tf.nn.relu)
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
print(rnn_outputs.get_shape())
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons], name='reshape1')
stacked_outputs = tf.keras.layers.Dense(n_outputs,name="hidden2")(stacked_rnn_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs], name='reshape2')
learning_rate = 0.001
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 1500
batch_size = 50
save_path =os.path.join(dir_path,"model","recurrent_sinus_model")
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
saver.save(sess, save_path)
with tf.Session() as sess:
saver.restore(sess, save_path)
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("Testing the model", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "w*", markersize=10, label="target")
plt.plot(t_instance[1:], y_pred[0,:,0], "r.", markersize=10, label="prediction")
plt.legend(loc="upper left")
plt.xlabel("Time")
plt.show()
# In[ ]:
with tf.Session() as sess:
saver.restore(sess, save_path)
X_new = time_series(np.array(t.reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("A time series (generated)", fontsize=14)
plt.plot(t, time_series(t), label=r"original",linewidth=5,c='r')
plt.plot(t[:-1], time_series(t[:-1]), "b-", linewidth=3, label="A training instance")
plt.legend(loc="lower left", fontsize=14)
plt.xlabel("Time")
plt.ylabel("Value")

So the answer is:
rnn_outputs, rnn_states = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1", return_state=True, return_sequences=True)(X)
instead of
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
so the parameter return_sequences=True make the RNN return the time series as well, and well, this is the point.

Related

How to print out prediction value in tensorflow

I am new to tensorflow and I am a slow learner. After successfully compiling the model and get the accuracy I want to print the prediction variable but I dont know how to do it.
My dataset is multivariate feature with only one output. The output contains only 1, 0 ,-1 so I made one hot encoder for the output. I finished compiling the model and looking for computing prediction on tensorflow online, however I didnt find a good solution base on my question.
The precisionCalculate function is to compute precision on each column on test data since the trian_y and test_y after one hot encode becomes [1,0,0],[0,1,0],[0,0,1].
I have tried
y_pred = sess.run(tf.argmax(y, 1), feed_dict={X: test_x, y: test_y})
but it turns out y_pred is exactly the same as my test_y
Here is my full code example.
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow.contrib.rnn
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
import pdb
np.set_printoptions(threshold=np.inf)
def precisionCalculate(pred_y, test_y):
count = pred_y + test_y
firstZero = len(count[count==0])
countFour = len(count[count == 4])
precision1 = firstZero / len(pred_y[pred_y==0] )
precision3 = countFour / len(pred_y[pred_y==2])
pdb.set_trace()
return precision1, precision3
df = pd.read_csv('new_df.csv', skiprows=[0], header=None)
df.drop(columns=[0,1], inplace=True)
df.columns = [np.arange(0, df.shape[1])]
df[0] = df[0].shift(-1)
#parameters
time_steps = 1
inputs = df.shape[1]
outputs = 3
#remove nan as a result of shift values
df = df.iloc[:-1, :]
#convert to numpy
df = df.values
train_number = 30276 #start date from 1018
train_x = df[: train_number, 1:]
test_x = df[train_number:, 1:]
train_y = df[:train_number, 0]
test_y = df[train_number:, 0]
#data pre-processing
#x y split
#scale
scaler = MinMaxScaler(feature_range=(0,1))
train_x = scaler.fit_transform(train_x)
test_x = scaler.fit_transform(test_x)
#reshape into 3d array
train_x = train_x[:, None, :]
test_x = test_x[:, None, :]
#one-hot encode the outputs
onehot_encoder = OneHotEncoder()
#encoder = LabelEncoder()
max_ = train_y.max()
max2 = test_y.max()
train_y = (train_y - max_) * (-1)
test_y = (test_y - max2) * (-1)
encode_categorical = train_y.reshape(len(train_y), 1)
encode_categorical2 = test_y.reshape(len(test_y), 1)
train_y = onehot_encoder.fit_transform(encode_categorical).toarray()
test_y = onehot_encoder.fit_transform(encode_categorical2).toarray()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)
#model parameters
learning_rate = 0.001
epochs = 100
batch_size = int(train_x.shape[0]/10)
length = train_x.shape[0]
display = 100
neurons = 100
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, time_steps, 90],name='x')
y = tf.placeholder(tf.float32, [None, outputs],name='y')
#LSTM cell
cell = tf.contrib.rnn.BasicLSTMCell(num_units = neurons, activation = tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# pass into Dense layer
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
# squared error loss or cost function for linear regression
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=y))
# optimizer to minimize cost
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
accuracy = tf.metrics.accuracy(labels = tf.argmax(y, 1), predictions = tf.argmax(out, 1), name = "accuracy")
precision = tf.metrics.precision(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1), name="precision")
recall = tf.metrics.recall(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1),name="recall")
f1 = 2 * accuracy[1] * recall[1] / ( precision[1] + recall[1] )
with tf.Session() as sess:
# initialize all variables
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
# Train the model
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size), range(batch_size, length+1, batch_size))
epoch_loss = 0
i = 0
# train data in mini-batches
for (start, end) in mini_batch:
sess.run(training_op, feed_dict = {X: train_x[start:end,:,:], y: train_y[start:end,:]})
# print training performance
if (steps+1) % display == 0:
# evaluate loss function on training set
loss_fn = loss.eval(feed_dict = {X: train_x, y: train_y})
print('Step: {} \tTraining loss: {}'.format((steps+1), loss_fn))
# evaluate model accuracy
acc, prec, recall, f1 = sess.run([accuracy, precision, recall, f1],feed_dict = {X: test_x, y: test_y})
y_pred = sess.run(tf.argmax(y, 1), feed_dict={X: train_x, y: train_y})
test_y_alter = np.argmax(test_y, axis=1)
#print(test_y_alter)
print(precisionCalculate(y_pred, test_y_alter))
print(y_pred)
#prediction = y_pred.eval(feed_dict={X: train_x, y: test_y})
#print(prediction)
print('\nEvaluation on test set')
print('Accuracy:', acc[1])
print('Precision:', prec[1])
print('Recall:', recall[1])
print('F1 score:', f1)
I think you should use the output of your model instead of the label (y) in tf.argmax.
Here is my code in order to print prediction of the model:
pred_y = tf.Print(tf.argmax(score, 1), [tf.argmax(score, 1)], message="prediction:)
pred_y.eval()
In the above code, score means the probability output of your model.

Combine tf.keras.layers with Tensorflow low level API

Can i combine tf.keras.layers with low level tensorflow?
the code is not correct but i want to do something like that:create placeholders that later will be fed with data (in tf.Session()) and to feed that data to my model
X, Y = create_placeholders(n_x, n_y)
output = create_model('channels_last')(X)
cost = compute_cost(output, Y)
Yes, it is the same as using tf.layers.dense(). Using tf.keras.layers.Dense() is actually a preferred way in newest tensorflow version 1.13 (tf.layers.dense() is deprectated). For example
import tensorflow as tf
import numpy as np
x_train = np.array([[-1.551, -1.469], [1.022, 1.664]], dtype=np.float32)
y_train = np.array([1, 0], dtype=int)
x = tf.placeholder(tf.float32, shape=[None, 2])
y = tf.placeholder(tf.int32, shape=[None])
with tf.name_scope('network'):
layer1 = tf.keras.layers.Dense(2, input_shape=(2, ))
layer2 = tf.keras.layers.Dense(2, input_shape=(2, ))
fc1 = layer1(x)
logits = layer2(fc1)
with tf.name_scope('loss'):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss_fn = tf.reduce_mean(xentropy)
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(0.01)
train_op = optimizer.minimize(loss_fn)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
loss_val = sess.run(loss_fn, feed_dict={x:x_train, y:y_train})
_ = sess.run(train_op, feed_dict={x:x_train, y:y_train})

ValueError: Cannot feed value of shape (128, 28, 28) for Tensor 'Placeholder:0', which has shape '(?, 784)'

I am new to Tensorflow and Machine Learning and trying out CNN using Tensorflow with my custom input data. But I am getting the error attached below.
The Data or Image Size is 28x28 with 15 Labels.
I am not getting the numpy reshape thing in this script or the error.
Help is highly appreciated.
import tensorflow as tf
import os
import skimage.data
import numpy as np
import random
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".jpg")]
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(d)
print(str(d)+' Completed')
return images, labels
ROOT_PATH = "H:\Testing\TrainingData"
train_data_directory = os.path.join(ROOT_PATH, "Training")
test_data_directory = os.path.join(ROOT_PATH, "Testing")
print('Loading Data...')
images, labels = load_data(train_data_directory)
print('Data has been Loaded')
n_classes = 15
training_examples = 10500
test_examples = 4500
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def neural_network_model(x):
weights = {'W_Conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_Conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_FC':tf.Variable(tf.random_normal([7*7*64, 1024])),
'Output':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'B_Conv1':tf.Variable(tf.random_normal([32])),
'B_Conv2':tf.Variable(tf.random_normal([64])),
'B_FC':tf.Variable(tf.random_normal([1024])),
'Output':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1,28,28,1])
conv1 = conv2d(x, weights['W_Conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['W_Conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_FC'])+biases['B_FC'])
output = tf.matmul(fc, weights['Output'])+biases['Output']
return output
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(training_examples/batch_size)):
epoch_x, epoch_y = next_batch(batch_size, images, labels)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x: images, y: labels}))
print('Training Neural Network...')
train_neural_network(x)
What am I doing wrong? What is needed to be fixed and how do I fix the shape of numpy array?
If you look closely, you'll see that you have two x placeholders:
x = tf.placeholder('float', [None, 784]) # global
...
x = tf.reshape(x, shape=[-1,28,28,1]) # in neural_network_model
One of them is in the function scope, hence not visible in train_neural_network, so tensorflow takes the one with [?, 784] shape. You should get rid of one of them.
Also note that your training data has the rank 3, i.e. [batch_size, 28, 28], so it's not directly compatible with any of those placeholders.
To feed it into the first x, take epoch_x.reshape([-1, 784]). For the second placeholder (once you make it visible), take epoch_x.reshape([-1, 28, 28, 1]).

tf.layers.batch_normalization freezes during sess.run() (1.5.0-dev20171031)

The graph building phase passes without error, but the program freezes (no reading hard drive, no memory change, no ...) during sess.run() in the first mini-batch in the first epoch. If I remove this layer or replace it with tf.contrib.layers.layer_norm, the program runs without issues.
The tensor (x) I pass into tf.layers.batch_normalization has the shape [#batches, 200]. I use most default values, but turned off the center and scale.
x_BN = tf.layers.batch_normalization(
x,
axis=-1,
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name=None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
The tensorflow version I'm using is tf-nightly-gpu (1.5.0-dev20171031 or 1.5.0-dev20171023). Has anyone encountered a similar problem?
Update
This happens when the input of tf.layers.batch_normalization is from tf.nn.bidirectional_dynamic_rnn, please see a simplified code to reproduce this issue:
import tensorflow as tf
import numpy as np
starter_learning_rate = 0.001
decay_steps = 100
decay_rate = 0.96
num_RNN_layers = 3
LSTM_CELL_SIZE = 100
keep_prob = 0.95
with tf.name_scope('Inputs'):
x = tf.placeholder(dtype=tf.float32, shape=[None, 200])
y = tf.placeholder(dtype=tf.float32, shape=[None, 200])
length = tf.placeholder(dtype=tf.int32, shape=[None])
Flg_training = tf.placeholder(dtype=tf.bool, shape=[])
x_1 = tf.expand_dims(x, -1)
with tf.name_scope('BiLSTM'):
dropcells = []
for iiLyr in list(range(num_RNN_layers)):
cell_iiLyr = tf.nn.rnn_cell.LSTMCell(num_units=LSTM_CELL_SIZE, state_is_tuple=True)
dropcells.append(tf.nn.rnn_cell.DropoutWrapper(cell=cell_iiLyr, output_keep_prob=keep_prob)) #,, input_keep_prob=self.keep_prob input_keep_prob=1.0, seed=None
MultiLyr_cell = tf.nn.rnn_cell.MultiRNNCell(cells=dropcells, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=MultiLyr_cell,
cell_bw=MultiLyr_cell,
dtype=tf.float32,
sequence_length=length, #tf_b_lens
inputs=x_1, #stacked_RefPts_desc, #tf_b_VCCs_AMs_BN1
scope = "BiLSTM"
)
#output_fw, output_bw = outputs
states_fw, states_bw = states
c_fw_lstLyr, h_fw_lstLyr = states_fw[-1]
c_bw_lstLyr, h_bw_lstLyr = states_bw[-1]
states_concat1 = tf.concat([h_fw_lstLyr, h_bw_lstLyr], axis = 1, name = 'states_concat')
with tf.name_scope("cs_BN1"):
x_BN = tf.layers.batch_normalization(
states_concat1,
axis=-1, # axis that should be normalized (typically the features axis, in this case the concated states or hidden vectors)
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name="test_BN", #None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
with tf.name_scope("Regression"):
a = tf.get_variable("a", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
b = tf.get_variable("b", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
with tf.name_scope("Prediction"):
y_pred = tf.multiply(x_BN, a) + b
with tf.name_scope('Loss'):
losses = tf.losses.mean_squared_error(y, y_pred, reduction=tf.losses.Reduction.NONE)
mean_loss = tf.reduce_mean(losses)
with tf.name_scope('Training'):
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
decay_steps, decay_rate, staircase=True)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(losses, global_step=global_step)
#x_mean = tf.reduce_mean(x_BN, axis=0)
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter("G:\\Surface_Ozone\\Temp\\", sess.graph)
sess.run(tf.global_variables_initializer())
for ii in list(range(2000)):
x_in = (np.random.rand(20, 200))
y_in = x_in * 1.5 + 3.0
length_in = np.full([20], 200, dtype=np.int32)
_, mean_loss_val, a_val, b_val = sess.run([train_step, mean_loss, a, b], feed_dict={
x: x_in,
Flg_training: True,
y: y_in,
length: length_in
})
if (ii < 50):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
else:
if (ii % 100 == 0):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
print("Normal End.")

TensorFlow: why use fp result y rather than ExponentialMovingAverage fp result average_y as cross_entropy's parameter?

The code is as below using python 3,Anaconda Spyder3.6,Tensorflow 1.0.0
"""
Created on Sat Oct 14 11:00:54 2017
#author: Han.H
"""
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001 #lambda
TRAINING_STEPS = 20000
MOVING_AVERAGE_DECAY = 0.99
# when not use ExponentialMovingAverage,just nomal fp
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
if avg_class == None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
return tf.matmul(layer1, weights2) + biases2
else:
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)
# build a 3-layer full connected NN
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
# normal fp
y = inference(x, None, weights1, biases1, weights2, biases2)
global_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# L2
regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
regularaztion = regularizer(weights1) + regularizer(weights2)
loss = cross_entropy_mean + regularaztion
# Set learning rate
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
# Gradient descent
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
test_feed = {x: mnist.test.images, y_: mnist.test.labels}
for i in range(TRAINING_STEPS):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
xs,ys=mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op,feed_dict={x:xs,y_:ys})
test_acc=sess.run(accuracy,feed_dict=test_feed)
print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))
def main(argv=None):
# Main programme here
mnist = input_data.read_data_sets("F:/python/MNIST_data/", one_hot=True)
train(mnist)
if __name__=='__main__':
main()
This code has no problem and run well. I just want to know that why can't use average_y as logits to calculate cross entropy.I tried to do so.It turned out terrible results.The accuracy was as random-initialized firstly as 0.009.