Parameter in TensorFlow Polynomial Regression Nan - tensorflow

I am running following polynomial regression model. I am running the following code:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import data_reader
learning_rate = 0.01
training_epochs = 40
freq = {}
freq = data_reader.read('311.csv', 0, '%Y-%m-%d', 2016)
trX = np.array(list(freq.keys())).astype(float)
trY = np.array(list(freq.values())).astype(float)
num_coeffs = 6
plt.scatter(trX, trY)
plt.show()
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
def model(X, w):
terms = []
for i in range(num_coeffs):
term = tf.multiply(w[i], tf.pow(X, i))
terms.append(term)
return tf.add_n(terms)
w = tf.Variable([0.] * num_coeffs, name="parameters")
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op) #execute init_op
y_model = model(X, w)
cost = (tf.pow(Y-y_model, 2))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(training_epochs):
for (x, y) in zip(trX, trY):
sess.run(train_op, feed_dict={X: x, Y: y})
w_val = sess.run(w)
print(w_val)
sess.close()
Where trX and trY are 52-long array of numbers. Unfortunately the parameters w_val are all [nan nan nan nan nan nan]. What am i doing wrong?
thanks.

I solved by normalizing (0-1) the X-axis. But do i need to normalize it?

Related

Recurrent neural network, time series prediction with newer Tensorflow 1.14

How to use new tf.keras API with recurrent neural network? I have checked the documentation but there is no example of such a situation.
There is this great book Hands on machine learning from 2017. Since that year the API of tensorflow has evolved and I am trying to rewrite recurrent neural network for time series prediction with using version 1.14 code.
The code from the book is using older tf.nn.dynamic_rnn and tf.nn.rnn_cell.BasicRNNCell:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
learning_rate = 0.001
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)
rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 500
batch_size = 50
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
And this code works just fine (except that it throws warnings about deprecation left and right). I wanted to use tf.keras API as suggested in warning. My code is the same except:
cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation=tf.nn.relu)
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
But this yields following exception:
InvalidArgumentError: Input to reshape is a tensor with 50 values, but the requested shape requires a multiple of 20
[[node Reshape_1 (defined at <ipython-input-9-879361be49dd>:3) ]]
so I understand that the problematic line is
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
After checking and comparing documentation for both cells https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn and
https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN I can't find the culprit.
What is the difference with these two cells? How to use tf.keras API with time series?
Full old code: https://github.com/ageron/handson-ml/blob/master/14_recurrent_neural_networks.ipynb
Full "my" code:
import numpy as np
import tensorflow as tf
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
from utils import shuffle_batch, variable_summaries
import os
dir_path = os.getcwd()
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
print(dir_path)
t_min, t_max = -5, 5
section_start = (t_max + t_min) / 2
resolution = 0.1
n_steps = 20
def time_series(t):
return np.sin(t)
def next_batch(batch_size, n_steps):
t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
Ts = t0 + np.arange(0., n_steps + 1) * resolution
ys = time_series(Ts)
return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)
t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))
t_instance = np.linspace(start = section_start, stop = section_start + resolution * (n_steps + 1),num = n_steps + 1)
plt.figure(figsize=(11,4))
plt.subplot(121)
plt.title("A time series (generated)", fontsize=14)
plt.plot(t, time_series(t), label=r"original")
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "b-", linewidth=3, label="A training instance")
plt.legend(loc="lower left", fontsize=14)
#plt.axis([-10, 10, -17, 13])
plt.xlabel("Time")
plt.ylabel("Value")
plt.subplot(122)
plt.title("A training instance", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "c*", markersize=10, label="target")
plt.legend(loc="upper left")
plt.xlabel("Time")
# In[6]:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
# In[7]:
cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation=tf.nn.relu)
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
print(rnn_outputs.get_shape())
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons], name='reshape1')
stacked_outputs = tf.keras.layers.Dense(n_outputs,name="hidden2")(stacked_rnn_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs], name='reshape2')
learning_rate = 0.001
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 1500
batch_size = 50
save_path =os.path.join(dir_path,"model","recurrent_sinus_model")
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
saver.save(sess, save_path)
with tf.Session() as sess:
saver.restore(sess, save_path)
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("Testing the model", fontsize=14)
plt.plot(t_instance[:-1], time_series(t_instance[:-1]), "bo", markersize=10, label="instance")
plt.plot(t_instance[1:], time_series(t_instance[1:]), "w*", markersize=10, label="target")
plt.plot(t_instance[1:], y_pred[0,:,0], "r.", markersize=10, label="prediction")
plt.legend(loc="upper left")
plt.xlabel("Time")
plt.show()
# In[ ]:
with tf.Session() as sess:
saver.restore(sess, save_path)
X_new = time_series(np.array(t.reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
plt.title("A time series (generated)", fontsize=14)
plt.plot(t, time_series(t), label=r"original",linewidth=5,c='r')
plt.plot(t[:-1], time_series(t[:-1]), "b-", linewidth=3, label="A training instance")
plt.legend(loc="lower left", fontsize=14)
plt.xlabel("Time")
plt.ylabel("Value")
So the answer is:
rnn_outputs, rnn_states = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1", return_state=True, return_sequences=True)(X)
instead of
rnn_outputs = tf.keras.layers.RNN(cell,dtype=tf.float32, name="hidden1")(X)
so the parameter return_sequences=True make the RNN return the time series as well, and well, this is the point.

"keras.backend.variable" is not behaving correctly in keras as opposed to tensorflow

I want to define trainable scalar in my models. In TensorFlow, this is done using tf.Variable. In Keras, keras.backend.variable is supposed to behave the same way. However, when I use model.fit, keras does not change the variable during the optimization process. Does anyone know why?
To test, please uncomment RUN_ON = "tensorflow" or RUN_ON = "keras" to run on either of engines.
import numpy as np
import keras as k
import tensorflow as tf
import matplotlib.pyplot as plt
# RUN_ON = "tensorflow"
# RUN_ON = "keras"
b_true = 3.0
w_true = 5.0
x_true = np.linspace(0.0, 1.0, 1000).reshape(-1, 1)
y_true = x_true * w_true + b_true
ids = np.arange(0, x_true.shape[0])
if RUN_ON=="keras":
x = k.Input((1,), dtype="float32", name="x")
Fx = k.layers.Dense(1, use_bias=False, name="Fx")(x)
b = k.backend.variable(1.0, name="b")
y = k.layers.Lambda(lambda x: x+b, name="Add")(Fx)
model = k.Model(inputs=[x], outputs=[y])
model.compile("adam", loss="mse")
# model.summary()
model.fit(x_true, [y_true], epochs=100000, batch_size=1000)
y_pred = model.predict(x_true)
elif RUN_ON=="tensorflow":
x = tf.placeholder("float32", shape=[None, 1], name="x")
Fx = tf.layers.Dense(1, use_bias=False, name="Fx")(x)
b = tf.Variable(1.0, name="b")
y = Fx + b
yp = tf.placeholder("float32", shape=[None, 1], name="y")
loss = tf.reduce_mean(tf.square(yp - y))
opt = tf.train.AdamOptimizer(0.001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100000):
np.random.shuffle(ids)
opt_out, loss_val, b_val = sess.run([opt, loss, b], feed_dict={x: x_true[ids], yp: y_true[ids]})
print("epoch={:d} loss={:e} b_val={:f}".format(i, loss_val, b_val))
if loss_val < 1.0e-9:
break
y_pred = sess.run([y], feed_dict={x: x_true, yp: y_true})[0]
else:
raise ValueError('`RUN_ON` should be either `keras` or `tensorflow`.')
plt.plot(x_true, y_true, '--b', linewidth=4)
plt.plot(x_true, y_pred, 'r')
plt.show()
#

Tensorflow session returns nan

import numpy as np
import tensorflow as tf
x_input=np.linspace(0,20,100)
y_input = 4*x_input+6
W=tf.Variable(0.0, name="weight")
b= tf.Variable(0.0, name="bias")
X=tf.placeholder(tf.float32,name='InputX')
Y=tf.placeholder(tf.float32, name='InputY')
Y_pred = X*W+b
loss = tf.reduce_mean(tf.square(Y_pred-Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for step in range(50):
total_loss = 0
for x, y in zip(x_input,y_input):
print (x, y)
sess.run([optimizer,loss], feed_dict={X:x,Y:y})
w, b = sess.run([W, b])
print("Model parameters: ",w,b)
# '''Above prints nan nan.'''
For simple problem; you need lower learning_rate; it seems due to multiples updates with higher learning rate model parameters values vanishes.
# setting learning_rate 0.001, gives proper value
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
...
print(w, b)
# results 4.05073 4.98799

TensorFlow Linear Regression gives 'NaN' result

I am currently running the TensorFlow model with Linear Regression. However, I don't understand why, even when I decrease the learning_rate from 0.01 to 0.001 and increase the training iterations from 1000 to 50000, I still obtain the 'nan' result for the cost function, as well as the two coefficients. Could anyone please help me detect the problem in the following code?
from __future__ import print_function
import tensorflow as tf
import numpy
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import random
rng = numpy.random
# Parameters
learning_rate = 0.001
training_epochs = 20000 #number of iterations
display_step = 400
#read csv file
datapath = [directory path]
Ha_Noi = pd.read_csv(datapath+"HaNoi_1month_LW_WeatherTest.csv")
#Add an additional column into the table
sLength = len(Ha_Noi['accept_rate'])
Ha_Noi['accept_rate_timeT'] = pd.Series(Ha_Noi['accept_rate'], index=Ha_Noi.index)
#Shift the entries in the accept_rate column upward
Ha_Noi.accept_rate = Ha_Noi.accept_rate.shift(-1)
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent4"])
Ha_Noi = Ha_Noi.dropna(subset=["accept_rate"])
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent2"])
df2 = pd.DataFrame(Ha_Noi)
#split the dataset into training and testing sets
train_set, test_set = train_test_split(Ha_Noi, test_size=0.2, random_state = random.randint(20, 200))
Xtrain = train_set['longwait_percent2'].reshape(-1,1)
Ytrain = train_set['accept_rate'].reshape(-1,1)
Xtrain2 = train_set['Weather Weight_Longwait_percent2'].reshape(-1,1)
Xtest2 = test_set['Weather Weight_Longwait_percent2'].reshape(-1,1)
# Xtest = test_set['longwait_percent2'].reshape(-1,1)
# Ytest = test_set['accept_rate'].reshape(-1,1)
# Training Data
train_X = Xtrain
train_Y = Ytrain
n_samples = train_X.shape[0]
#Testing Data
Xtest = np.asarray(test_set['longwait_percent2'])
Ytest = np.asarray(test_set['accept_rate'])
# tf Graph Input
X = tf.placeholder("float")
Y = tf.placeholder("float")
# Set model weights
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
# Construct a linear model
pred = tf.add(tf.multiply(X, W), b)
# Mean squared error
cost = tf.sqrt(tf.reduce_sum(tf.pow(pred-Y, 2))/(n_samples))
# Gradient descent method
# Note, minimize() knows to modify W and b because Variable objects are "trained" (trainable=True by default)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
saver = tf.train.Saver() #save all the initialized data
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per epoch step
if (epoch+1) % display_step == 0: # checkpoint every 50 epochs
c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
# Graphic display
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
testing_cost = sess.run(
tf.reduce_sum(tf.pow(pred - Y, 2)) / (Xtest.shape[0]),
feed_dict={X: Xtest, Y: Ytest}) # square root of function cost above
print("Root Mean Square Error =", tf.sqrt(testing_cost))
print("Absolute mean square loss difference:", abs(
training_cost - testing_cost))
plt.plot(Xtest, Ytest, 'bo', label='Testing data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
Don't have your data, so it's hard to tell whether the problem is caused by data or by training problem. You can make learning rate and training iteration much smaller such 0.00005 and 100 to see is there still NaN.

tensorflow error occur on tf.matmul

I have error in 13 lines y = tf.matmul(W, x_data) + b below codes,
I cant understand reason
import tensorflow as tf
import numpy as np
x_data = np.float32(np.random.rand(2, 100))
y_data = np.dot([0.100, 0.200], x_data) + 0.300
b = tf.Variable(tf.zeros([1]))
W = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0))
y = tf.matmul(W, x_data) + b
loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
#sess.run(x_data)
for step in xrange(0, 201):
sess.run(train)
if step % 20 == 0:
print step, sess.run(W), sess.run(b)
#print "xdata=", x_data
#print "ydata=", y_data