How to predict a very large number M of xy coordinates? - tensorflow

I have dataset of 6000 samples.
Each sample has 121 points of x and y coordinates.
The dataset is like this (6000, 121, 2).
I need to predict 1464 points of xy coordinates. So what I need is to map the 121,2 input points into 1464,2 out points?
I have tried many approaches pure ANN,CNN, endcoder decoder.
However the results weren't satisfying.
Can you one has an idea how to solve such problem?
I have tried several approaches as ANN and CNN. However, there was somehow big difference between the predicted outputs and the labels.

one way possible is simply session.run(), optimizer this example is for experiment and understanding of inputs shape where you can use layers and model too but feeds shape must be correct for training and predictions.
Sample: X and Y Co-ordinate learning, monitors the loss value, and prevent errors from unsatisfied conditions.
import os
from os.path import exists
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
learning_rate = 0.001
global_step = 0
tf.compat.v1.disable_eager_execution()
X = np.arange(2, 6002, 1)
Y = np.arange(12, 3012, 0.5)
X = np.reshape( X, (1, 6000, 1))
Y = np.reshape( Y, (1, 6000, 1))
history = [ ]
history_Y = [ ]
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate)
optimizer = tf.compat.v1.train.ProximalAdagradOptimizer(
learning_rate,
initial_accumulator_value=0.1,
l1_regularization_strength=0.2,
l2_regularization_strength=0.1,
use_locking=False,
name='ProximalAdagrad'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
var1 = tf.Variable(10.0)
var2 = tf.Variable(10.0)
X_var = tf.compat.v1.get_variable('X', dtype = tf.float32, initializer = tf.random.normal((1, 6000, 1)))
y_var = tf.compat.v1.get_variable('Y', dtype = tf.float32, initializer = tf.random.normal((1, 6000, 1)))
Z = tf.nn.l2_loss((var1 - X_var) ** 2 + (var2 - y_var) ** 2, name="loss")
cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
loss = tf.reduce_mean(input_tensor=tf.square(Z))
training_op = optimizer.minimize(cosine_loss(X_var, y_var))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
# number of data feeds ( 6000 ==> 1000 )
for i in range(1000):
global_step = global_step + 1
train_loss, temp = sess.run([loss, training_op], feed_dict={X_var:X, y_var:Y})
history.append(train_loss)
history_Y.append( history[0] - train_loss )
x_value = X_var.eval()[0][i][0]
y_value = y_var.eval()[0][i][0]
v = Z.eval()
print( 'x_value ' + str( x_value ) )
print( 'y_value ' + str( y_value ) )
print( 'v ' + str( v ) )
print( 'steps: ' + str(i) )
X2 = np.reshape([ 500, -400, 400, -300, 300, -200, 200, -100, 100, 1 ], (1, 10, 1))
Y2 = np.reshape([ -400, 400, -300, 300, -200, 200, -100, 100, -50, 50 ], (1, 10, 1))
X_var = X_var[:,:10:]
y_var = y_var[:,:10:]
for i in range(10):
train_loss, temp = sess.run([loss, training_op], feed_dict={X_var:X2, y_var:Y2})
x_value = X_var.eval()[0][i][0]
y_value = y_var.eval()[0][i][0]
v = Z.eval()
print( 'x_value ' + str( x_value ) )
print( 'y_value ' + str( y_value ) )
print( 'v ' + str( v ) )
print( 'steps: ' + str(i) )
sess.close()
plt.plot(history)
plt.plot(history_Y)
plt.show()
plt.close()
input('...')
Output: Target X and Y coordinate prediction, training tries to evaluate the var1, var2, loss values, and square errors.
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
x_value 0.36800474
y_value -0.53657633
v 121269736.0
steps: 883
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Predict
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
x_value -0.25996116
y_value 0.006405342
v 120543970.0
steps: 7

Related

Can I rename a TensorFlow operation?

I ran the TensorFlow profiler using Python and got several types and operations.
Is there a way to change the name of the type and make it output with the changed name when profiling is performed? For example, I would like to know if there is a way to rename a type named Conv2D to conv2D_LOVE so that it is output to the profiler.
I'm currently searching, but I can't find the right way.
For example, I use the Alexnet model, and after profiling I got this output:
There are many of the class inherits that are allowed for the customizations. One is the Dense and LSTM layer. They are suitable to perform tasks internally and return you specific data as blocks of code.
Sample:
import tensorflow as tf
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class My_3D_noises_generator(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(My_3D_noises_generator, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_outputs],
initializer=tf.ones_initializer())
def call(self, inputs):
pi = 3.141592653589793
start = 0.0
stop = 1.0 * 2.0 * pi
x = tf.linspace( start, stop, self.num_outputs, name='linspace', axis=0 )
y1 = 3 * tf.math.sin( x )
escape_sine = tf.random.normal(
shape=( self.num_outputs, ),
mean=0.0,
stddev=0.15 * tf.math.abs( y1, name='abs' ),
dtype=tf.dtypes.float32,
seed=32,
name=None
)
y1 = tf.concat( (tf.zeros(60), y1 + escape_sine, tf.zeros(60)), axis=0, name='concat' )
initial_degree = tf.experimental.numpy.arange( -3, 0, 3 / 60, dtype=tf.float32 )
midring_degree = tf.experimental.numpy.arange( 0, 3 * 2 * pi, ( 3 * 2 * pi) / self.num_outputs, dtype=tf.float32 )
skipped_degree = tf.experimental.numpy.arange( 3 * 2 * pi, 3 * 2 * pi + 3, ( 3 * 2 * pi - 3 * 2 * pi + 3 ) / 60, dtype=tf.float32 )
x = tf.concat(( initial_degree.numpy(), midring_degree.numpy(), skipped_degree.numpy()), axis=0, name='concat')
y2 = 0.1 * x + 1
y = y1 + y2
z = 15 * tf.random.normal(
shape=( 1, self.num_outputs, ),
mean=0.0,
stddev=1,
dtype=tf.dtypes.float32,
seed=32,
name=None
)
x = tf.expand_dims(x, axis=0)
y = tf.expand_dims(y, axis=0)
z = tf.matmul(inputs, z)
x = tf.matmul(inputs, x)
y = tf.matmul(inputs, y)
x = x[int(tf.math.argmax(x, axis=0)[0])]
y = y[int(tf.math.argmax(y, axis=0)[0])]
z = z[int(tf.math.argmax(z, axis=0)[0])]
x = tf.expand_dims(x, axis=-1)
y = tf.expand_dims(y, axis=-1)
z = tf.expand_dims(z, axis=-1)
return x, y, z
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Perform operations
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
start = 3
limit = 33
delta = 3
sample = tf.range(start, limit, delta)
sample = tf.cast( sample, dtype=tf.float32 )
sample = tf.constant( sample, shape=( 10, 1 ) )
layer = My_3D_noises_generator(100)
xdata, ydata, zdata = layer(sample)
ax = plt.axes(projection='3d')
# Data for a three-dimensional line
zline = tf.range(0, 1000, 25)
zline = tf.cast( zline, dtype=tf.float32 )
xline = 20 * tf.math.sin(zline)
yline = 20 * tf.math.cos(zline)
ax.plot3D(xline, yline, zline, 'gray')
ax.scatter3D(xdata[0:100,:], ydata[0:100,:], zdata[0:100,:], c=zdata[0:100,:], cmap='Greens');
plt.show()
Output: Generated random noises in 3D try to catches them.

How to print out prediction value in tensorflow

I am new to tensorflow and I am a slow learner. After successfully compiling the model and get the accuracy I want to print the prediction variable but I dont know how to do it.
My dataset is multivariate feature with only one output. The output contains only 1, 0 ,-1 so I made one hot encoder for the output. I finished compiling the model and looking for computing prediction on tensorflow online, however I didnt find a good solution base on my question.
The precisionCalculate function is to compute precision on each column on test data since the trian_y and test_y after one hot encode becomes [1,0,0],[0,1,0],[0,0,1].
I have tried
y_pred = sess.run(tf.argmax(y, 1), feed_dict={X: test_x, y: test_y})
but it turns out y_pred is exactly the same as my test_y
Here is my full code example.
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow.contrib.rnn
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
import pdb
np.set_printoptions(threshold=np.inf)
def precisionCalculate(pred_y, test_y):
count = pred_y + test_y
firstZero = len(count[count==0])
countFour = len(count[count == 4])
precision1 = firstZero / len(pred_y[pred_y==0] )
precision3 = countFour / len(pred_y[pred_y==2])
pdb.set_trace()
return precision1, precision3
df = pd.read_csv('new_df.csv', skiprows=[0], header=None)
df.drop(columns=[0,1], inplace=True)
df.columns = [np.arange(0, df.shape[1])]
df[0] = df[0].shift(-1)
#parameters
time_steps = 1
inputs = df.shape[1]
outputs = 3
#remove nan as a result of shift values
df = df.iloc[:-1, :]
#convert to numpy
df = df.values
train_number = 30276 #start date from 1018
train_x = df[: train_number, 1:]
test_x = df[train_number:, 1:]
train_y = df[:train_number, 0]
test_y = df[train_number:, 0]
#data pre-processing
#x y split
#scale
scaler = MinMaxScaler(feature_range=(0,1))
train_x = scaler.fit_transform(train_x)
test_x = scaler.fit_transform(test_x)
#reshape into 3d array
train_x = train_x[:, None, :]
test_x = test_x[:, None, :]
#one-hot encode the outputs
onehot_encoder = OneHotEncoder()
#encoder = LabelEncoder()
max_ = train_y.max()
max2 = test_y.max()
train_y = (train_y - max_) * (-1)
test_y = (test_y - max2) * (-1)
encode_categorical = train_y.reshape(len(train_y), 1)
encode_categorical2 = test_y.reshape(len(test_y), 1)
train_y = onehot_encoder.fit_transform(encode_categorical).toarray()
test_y = onehot_encoder.fit_transform(encode_categorical2).toarray()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)
#model parameters
learning_rate = 0.001
epochs = 100
batch_size = int(train_x.shape[0]/10)
length = train_x.shape[0]
display = 100
neurons = 100
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, time_steps, 90],name='x')
y = tf.placeholder(tf.float32, [None, outputs],name='y')
#LSTM cell
cell = tf.contrib.rnn.BasicLSTMCell(num_units = neurons, activation = tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# pass into Dense layer
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
# squared error loss or cost function for linear regression
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=y))
# optimizer to minimize cost
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
accuracy = tf.metrics.accuracy(labels = tf.argmax(y, 1), predictions = tf.argmax(out, 1), name = "accuracy")
precision = tf.metrics.precision(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1), name="precision")
recall = tf.metrics.recall(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1),name="recall")
f1 = 2 * accuracy[1] * recall[1] / ( precision[1] + recall[1] )
with tf.Session() as sess:
# initialize all variables
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
# Train the model
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size), range(batch_size, length+1, batch_size))
epoch_loss = 0
i = 0
# train data in mini-batches
for (start, end) in mini_batch:
sess.run(training_op, feed_dict = {X: train_x[start:end,:,:], y: train_y[start:end,:]})
# print training performance
if (steps+1) % display == 0:
# evaluate loss function on training set
loss_fn = loss.eval(feed_dict = {X: train_x, y: train_y})
print('Step: {} \tTraining loss: {}'.format((steps+1), loss_fn))
# evaluate model accuracy
acc, prec, recall, f1 = sess.run([accuracy, precision, recall, f1],feed_dict = {X: test_x, y: test_y})
y_pred = sess.run(tf.argmax(y, 1), feed_dict={X: train_x, y: train_y})
test_y_alter = np.argmax(test_y, axis=1)
#print(test_y_alter)
print(precisionCalculate(y_pred, test_y_alter))
print(y_pred)
#prediction = y_pred.eval(feed_dict={X: train_x, y: test_y})
#print(prediction)
print('\nEvaluation on test set')
print('Accuracy:', acc[1])
print('Precision:', prec[1])
print('Recall:', recall[1])
print('F1 score:', f1)
I think you should use the output of your model instead of the label (y) in tf.argmax.
Here is my code in order to print prediction of the model:
pred_y = tf.Print(tf.argmax(score, 1), [tf.argmax(score, 1)], message="prediction:)
pred_y.eval()
In the above code, score means the probability output of your model.

How to make lstm/rnn focus more on certain parts of time series while less on other parts using tensorflow?

I have a time series prediction problem where most of the observed values (95%) are 0s while remaining values are non-zeros. How can I make use of RNN for this problem.
I want to predict surface flow from environmental data(air temperature, rainfall, humidity etc). We know surface flow is 0.0 for most of the time in an year. However, I also don't want to simply ignore 0s as the 0s represent the period of the year when when surface flow is 0.0. The image below shows possible observed output and three inputs. The three inputs here are just random but in reality they will be data like rainfall, humidity etc and these input data have some periodic pattern.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import tensorflow as tf
import sys
print(sys.version)
print('tensorflow version: ', tf.__version__)
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.linspace(0, 20, self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy, percent_of_zeros):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
indices = np.random.choice(np.arange(dataY.size), replace=False,
size=int(dataY.size * percent_of_zeros))
dataY[indices] = 0
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 16
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 1005 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
# xx = abs(xx)
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
zeros = 0.96
dataX, dataY = gen_data.create_dataset(xx1,yy1, zeros)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
plt.plot(dataY, '.', label='output')
plt.plot(xx[:,0], '.', label='input1')
plt.plot(xx[:,1], '.', label='input2')
plt.plot(xx[:,2], '.', label='input3')
plt.legend()
# build neural network
with tf.variable_scope('scope0'): #defining RNN
# cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
cell = tf.keras.layers.LSTMCell(units = 128)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
# Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred1 = tf.keras.layers.Dense(1)(outputs1[:,-1])
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
print('epoch: # {} loss: {}'.format(epoch, step_loss))
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
The problem is that while training, the model focuses on majority of values i.e. 0s and thus makes the predictions equal to 0s. How can I make the model focus on non-zero values (positive surface flow) while at the same time also consider 0s (when there is no surface flow). I have read about attention mechanism but have not understood that how I can implement it in such scenarios.

Understanding model loss/accuracy and how not to leak information

This question is related to the starting one posted here.
The problem is to classify rows so that the classification of row number i can rely on the data for all the previous rows including class membership. The linked post contains an answer which is posted bellow.
For the sake of experimentation I've used a set of randomly crafted data, where the classifying property is a 0,1 uniform random variable.
What strikes me is that the loss of the model in the above example is really low and the accuracy is 99% whereas I would expect something in the 50% range.
So I am assuming that the way the model is testing the classification is leaking information somehow.
Does anybody happen to see what's the issue? What would be the proper way to evaluate the accuracy in such scenario?
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from random import randint
SIZE = 100
df = pd.DataFrame({'Temperature': list(range(SIZE)),
'Weight': [randint(1,100) for _ in range(SIZE)],
'Size': [randint(1,10000) for _ in range(SIZE)],
'Property': [randint(0,1) for _ in range(SIZE)]})
df.Property = df.Property.shift(-1)
print ( df.head() )
# parameters
time_steps = 1
inputs = 3
outputs = 2
df = df.iloc[:-1,:]
df = df.values
train_X = df[:, :-1]
train_y = df[:, -1]
scaler = MinMaxScaler(feature_range=(0, 1))
train_X = scaler.fit_transform(train_X)
train_X = train_X[:,None,:]
onehot_encoder = OneHotEncoder()
encode_categorical = train_y.reshape(len(train_y), 1)
train_y = onehot_encoder.fit_transform(encode_categorical).toarray()
learning_rate = 0.001
epochs = 50000
batch_size = int(train_X.shape[0]/2)
length = train_X.shape[0]
display = 100
neurons = 100
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, time_steps, inputs])
y = tf.placeholder(tf.float32, [None, outputs])
cell = tf.contrib.rnn.BasicLSTMCell(num_units=neurons, activation=tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
labels=y, logits=out))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
accuracy = tf.metrics.accuracy(labels = tf.argmax(y, 1),
predictions = tf.argmax(out, 1),
name = "accuracy")
precision = tf.metrics.precision(labels=tf.argmax(y, 1),
predictions=tf.argmax(out, 1),
name="precision")
recall = tf.metrics.recall(labels=tf.argmax(y, 1),
predictions=tf.argmax(out, 1),
name="recall")
f1 = 2 * accuracy[1] * recall[1] / ( precision[1] + recall[1] )
with tf.Session() as sess:
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size),
range(batch_size, length+1, batch_size))
for (start, end) in mini_batch:
sess.run(training_op, feed_dict = {X: train_X[start:end,:,:],
y: train_y[start:end,:]})
if (steps+1) % display == 0:
loss_fn = loss.eval(feed_dict = {X: train_X, y: train_y})
print('Step: {} \tTraining loss: {}'.format((steps+1), loss_fn))
acc, prec, recall, f1 = sess.run([accuracy, precision, recall, f1],
feed_dict = {X: train_X, y: train_y})
print('\nEvaluation on training set')
print('Accuracy:', acc[1])
print('Precision:', prec[1])
print('Recall:', recall[1])
print('F1 score:', f1)

Linear Regression With Manual Gradient Computation

I understand Code 1 is the code for the Linear Regression using tf.train.GradientDescentOptimizer which belong to TensorFlow library(black box).
Code 2 is a code example to do the same thing without GradientDescentOptimizer.
is the code without the black box.
I want to add bias (# hypothesis = X * W + b) in Code 2. In this case, how the code(gradient, descent, update, etc) should be?
Code 1
import tensorflow as tf
x_train = [1, 2, 3]
y_train = [1, 2, 3]
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(5.)
b = tf.Variable(5.)
hypothesis = X * W + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
learning_rate = 0.1
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
gvs = optimizer.compute_gradients(cost, [W, b])
apply_gradients = optimizer.apply_gradients(gvs)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(21):
gradient_val, cost_val, _ = sess.run(
[gvs, cost, apply_gradients], feed_dict={X: x_train, Y: y_train})
print("%3d Cost: %10s, W': %10s, W: %10s, b': %10s, b: %10s" %
(step, round(cost_val, 5),
round(gradient_val[0][0] * learning_rate, 5), round(gradient_val[0][1], 5),
round(gradient_val[1][0] * learning_rate, 5), round(gradient_val[1][1], 5)))
Code2
import tensorflow as tf
x_train = [1, 2, 3]
y_train = [1, 2, 3]
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(5.)
# b = tf.Variable(5.) # Bias
hypothesis = X * W
# hypothesis = X * W + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
learning_rate = 0.1
gradient = tf.reduce_mean((W * X - Y) * X) * 2
descent = W - learning_rate * gradient
update = tf.assign(W, descent)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print(sess.run(W))
for step in range(21):
gradient_val, update_val, cost_val = sess.run(
[gradient, update, cost], feed_dict={X: x_train, Y: y_train})
print(step, gradient_val * learning_rate, update_val, cost_val)
I have referred An Introduction to Gradient Descent and Linear Regression
Code 2
import tensorflow as tf
x_train = [1, 2, 3]
y_train = [1, 2, 3]
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(5.)
b = tf.Variable(5.)
hypothesis = X * W + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
learning_rate = 0.1
W_gradient = tf.reduce_mean((W * X + b - Y) * X) * 2
b_gradient = tf.reduce_mean(W * X + b - Y) * 2
W_descent = W - learning_rate * W_gradient
b_descent = b - learning_rate * b_gradient
W_update = tf.assign(W, W_descent)
b_update = tf.assign(b, b_descent)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(21):
cost_val, W_gradient_val, W_update_val, b_gradient_val, b_update_val = sess.run(
[cost, W_gradient, W_update, b_gradient, b_update],
feed_dict={X: x_train, Y: y_train})
print("%3d Cost: %8s, W': %8s, W: %8s, b': %8s, b: %8s" %
(step, round(cost_val, 5),
round(W_gradient_val * learning_rate, 5), round(W_update_val, 5),
round(b_gradient_val * learning_rate, 5), round(b_update_val, 5)))