Linear Regression With Manual Gradient Computation - tensorflow

I understand Code 1 is the code for the Linear Regression using tf.train.GradientDescentOptimizer which belong to TensorFlow library(black box).
Code 2 is a code example to do the same thing without GradientDescentOptimizer.
is the code without the black box.
I want to add bias (# hypothesis = X * W + b) in Code 2. In this case, how the code(gradient, descent, update, etc) should be?
Code 1
import tensorflow as tf
x_train = [1, 2, 3]
y_train = [1, 2, 3]
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(5.)
b = tf.Variable(5.)
hypothesis = X * W + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
learning_rate = 0.1
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
gvs = optimizer.compute_gradients(cost, [W, b])
apply_gradients = optimizer.apply_gradients(gvs)
sess = tf.Session()
for step in range(21):
gradient_val, cost_val, _ =
[gvs, cost, apply_gradients], feed_dict={X: x_train, Y: y_train})
print("%3d Cost: %10s, W': %10s, W: %10s, b': %10s, b: %10s" %
(step, round(cost_val, 5),
round(gradient_val[0][0] * learning_rate, 5), round(gradient_val[0][1], 5),
round(gradient_val[1][0] * learning_rate, 5), round(gradient_val[1][1], 5)))
import tensorflow as tf
x_train = [1, 2, 3]
y_train = [1, 2, 3]
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(5.)
# b = tf.Variable(5.) # Bias
hypothesis = X * W
# hypothesis = X * W + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
learning_rate = 0.1
gradient = tf.reduce_mean((W * X - Y) * X) * 2
descent = W - learning_rate * gradient
update = tf.assign(W, descent)
sess = tf.Session()
for step in range(21):
gradient_val, update_val, cost_val =
[gradient, update, cost], feed_dict={X: x_train, Y: y_train})
print(step, gradient_val * learning_rate, update_val, cost_val)

I have referred An Introduction to Gradient Descent and Linear Regression
Code 2
import tensorflow as tf
x_train = [1, 2, 3]
y_train = [1, 2, 3]
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(5.)
b = tf.Variable(5.)
hypothesis = X * W + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
learning_rate = 0.1
W_gradient = tf.reduce_mean((W * X + b - Y) * X) * 2
b_gradient = tf.reduce_mean(W * X + b - Y) * 2
W_descent = W - learning_rate * W_gradient
b_descent = b - learning_rate * b_gradient
W_update = tf.assign(W, W_descent)
b_update = tf.assign(b, b_descent)
sess = tf.Session()
for step in range(21):
cost_val, W_gradient_val, W_update_val, b_gradient_val, b_update_val =
[cost, W_gradient, W_update, b_gradient, b_update],
feed_dict={X: x_train, Y: y_train})
print("%3d Cost: %8s, W': %8s, W: %8s, b': %8s, b: %8s" %
(step, round(cost_val, 5),
round(W_gradient_val * learning_rate, 5), round(W_update_val, 5),
round(b_gradient_val * learning_rate, 5), round(b_update_val, 5)))


rewrite TensorFlow 1.x to 2.x version

I need to rewrite a Tensorflow 1.x code to 2.x version. So, I rewrote the commented code as follows (the different activations and initializers were modified by myself):
def model(X, nact):
# h = conv(tf.cast(X, tf.float32), nf=32, rf=8, stride=1, init_scale=np.sqrt(2))
h = tf.keras.layers.Conv2D(filters=32,
# h2 = conv(h, nf=64, rf=4, stride=1, init_scale=np.sqrt(2))
h2 = tf.keras.layers.Conv2D(filters=64,
. . .
# pi = fc(h4, nact, act=lambda x: x)
pi = tf.keras.layers.Dense(units=nact,
# vf = fc(h4, 1, act=lambda x: tf.tanh(x))
vf = tf.keras.layers.Dense(units=1,
# filter out non-valid actions from pi
valid = tf.reduce_max(tf.cast(X, tf.float32), axis=1)
valid_flat = tf.reshape(valid, [-1, nact])
pi_fil = pi + (valid_flat - tf.ones(tf.shape(valid_flat))) * 1e32
return pi_fil, vf[:, 0]
Some methods further I have the following:
def build_model(args):
nh = args.max_clause
nw = args.max_var
nc = 2
nact = nc * nw
ob_shape = (None, nh, nw, nc * args.n_stack)
X = tf.placeholder(tf.float32, ob_shape)
Y = tf.placeholder(tf.float32, (None, nact))
Z = tf.placeholder(tf.float32, None)
p, v = model(X, nact)
params = tf.trainable_variables()
with tf.name_scope("loss"):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=p))
value_loss = tf.losses.mean_squared_error(labels=Z, predictions=v)
lossL2 = tf.add_n([tf.nn.l2_loss(vv) for vv in params])
loss = cross_entropy + value_loss + args.l2_coeff * lossL2
return X, Y, Z, p, v, params, loss
def self_play(args, status_track):
X, _, _, p, v, params, _ = build_model(args)
with tf.Session() as sess:
model_dir = status_track.get_model_dir(), os.path.join(args.save_dir, model_dir)))
. . .
def super_train(args, status_track):
X, Y, Z, _, _, params, loss = build_model(args)
with tf.name_scope("train"):
train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)
with tf.Session() as sess:
model_dir = status_track.get_sl_starter(), os.path.join(args.save_dir, model_dir)))
. . .
How can I rewrite these two functions in a TensorFlow 2.x, i.e., Keras-like style?
Tensorflow 2.x compatible code snippet.
def build_model(args):
nh = args.max_clause
nw = args.max_var
nc = 2
nact = nc * nw
ob_shape = (None, nh, nw, nc * args.n_stack)
X = tf.compat.v1.placeholder(tf.float32, ob_shape)
Y = tf.compat.v1.placeholder(tf.float32, (None, nact))
Z = tf.compat.v1.placeholder(tf.float32, None)
p, v = model(X, nact)
params = tf.compat.v1.trainable_variables()
with tf.name_scope("loss"):
cross_entropy = tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=p))
value_loss = tf.keras.metrics.mean_squared_error(labels=Z, predictions=v)
lossL2 = tf.add_n([tf.compat.v1.nn.l2_loss(vv) for vv in params])
loss = cross_entropy + value_loss + args.l2_coeff * lossL2
return X, Y, Z, p, v, params, loss

WGAN-GP Large Oscillating Loss

I am trying to train a WaveGAN as described here:
In the paper, the WaveGAN is trained using WGAN-GP, so I have tried to implement it myself by adapting code from: However, after even only 2000 steps (~1 epoch), the loss values I am getting for the critic and the generator are large (< 1000) and oscillate between negative and positive. My audio is the same piano recordings that they used, just resampled at 16000Hz and converted to mono from stereo.
My loss graphs are:
I was hoping someone could please validate whether my implementation is correct and if so, what experiments can I run to diagnose this problem?
Note: TIMESTEPS indicates the number of samples I wish to generate for each generator pass. Currently this is set to 1 to replicate WaveGAN, and I wish to experiment with this in the future. For now, I don't think it is relevant to the issue.
My script is:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
import numpy as np
import librosa
import random
import os
import sys
import time
import GANModels
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
EPOCHS = 2000
Fs = 16000
class GAN:
def __init__(self, model_dims=MODEL_DIMS, num_samples=NUM_SAMPLES, timesteps=TIMESTEPS, gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT,
noise_len=NOISE_LEN, batch_size=BATCH_SIZE, sr=Fs):
self.model_dims = model_dims
self.num_samples = num_samples
self.timesteps = timesteps
self.noise_dims = (timesteps, noise_len)
self.batch_size = batch_size
self.G = GANModels.Generator(self.model_dims, self.timesteps, num_samples)
self.D = GANModels.Critic(self.model_dims, self.timesteps, num_samples)
self.G_optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
self.D_optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
self.gradient_penalty_weight = gradient_penalty_weight = sr
def _d_loss_fn(self, r_logit, f_logit):
r_loss = - tf.reduce_mean(r_logit)
f_loss = tf.reduce_mean(f_logit)
return r_loss, f_loss
def _g_loss_fn(self, f_logit):
f_loss = - tf.reduce_mean(f_logit)
return f_loss
def _gradient_penalty(self, real, fake):
def _interpolate(a, b):
shape = [tf.shape(a)[0]] + [1] * (a.shape.ndims - 1)
alpha = tf.random.uniform(shape=shape, minval=0., maxval=1.)
inter = a + alpha * (b - a)
return inter
x = _interpolate(real, fake)
with tf.GradientTape() as t:
pred = self.D(x, training=True)
grad = t.gradient(pred, x)
norm = tf.norm(tf.reshape(grad, [tf.shape(grad)[0], -1]), axis=1)
gp = tf.reduce_mean((norm - 1.)**2)
return gp
def train_G(self):
with tf.GradientTape() as t:
z = tf.random.normal(shape=(self.batch_size,) + self.noise_dims)
x_fake = self.G(z, training=True)
x_fake_d_logit = self.D(x_fake, training=True)
G_loss = self._g_loss_fn(x_fake_d_logit)
G_grad = t.gradient(G_loss, self.G.trainable_variables)
self.G_optimizer.apply_gradients(zip(G_grad, self.G.trainable_variables))
return {'g_loss': G_loss}
def train_D(self, x_real):
with tf.GradientTape() as t:
z = tf.random.normal(shape=(x_real.shape[0],) + self.noise_dims) #Half fake and half real
x_fake = self.G(z, training=True)
x_real_d_logit = self.D(x_real, training=True)
x_fake_d_logit = self.D(x_fake, training=True)
x_real_d_loss, x_fake_d_loss = self._d_loss_fn(x_real_d_logit, x_fake_d_logit)
gp = self._gradient_penalty(x_real, x_fake)
D_loss = (x_real_d_loss + x_fake_d_loss) + gp * self.gradient_penalty_weight
D_grad = t.gradient(D_loss, self.D.trainable_variables)
self.D_optimizer.apply_gradients(zip(D_grad, self.D.trainable_variables))
return {'d_loss': x_real_d_loss + x_fake_d_loss, 'gp': gp}
def sample(self, epoch, num_samples=10):
z = tf.random.normal(shape=(num_samples,) + self.noise_dims)
result = self.G(z, training=False)
for i in range(num_samples):
audio = np.array(result[i, :, :])
librosa.output.write_wav(f"output/piano/{epoch}-{i}.wav", audio,
gan = GAN()
X_train = []
for file in os.listdir(r"D:\ML_Datasets\mancini_piano\piano\train"):
with open(r"D:\ML_Datasets\mancini_piano\piano\train" + fr"\{file}", "rb") as f:
samples, _ = librosa.load(f, Fs)
if len(samples) < TIMESTEPS*NUM_SAMPLES:
audio = np.array([np.array([sample]) for sample in samples])
padding = np.zeros(shape=(TIMESTEPS*NUM_SAMPLES - len(samples), 1), dtype='float32')
X_train.append(np.append(audio, padding, axis=0))
for i in range(len(samples) // (TIMESTEPS*NUM_SAMPLES)):
X_train.append(np.array([np.array([sample]) for sample in samples[:TIMESTEPS*NUM_SAMPLES]]))
samples = np.delete(samples, np.s_[:TIMESTEPS*NUM_SAMPLES])
print(f"X_train shape = {(len(X_train),) + X_train[0].shape}")
librosa.output.write_wav("output/piano/test.wav", X_train[0], sr=Fs)
train_summary_writer = tf.summary.create_file_writer("logs/train")
with train_summary_writer.as_default():
steps_per_epoch = len(X_train) // BATCH_SIZE
for e in range(EPOCHS):
for i in range(steps_per_epoch):
D_loss_sum = 0
for n in range(D_UPDATES_PER_G_UPDATE):
D_loss_dict = gan.train_D(np.array(random.sample(X_train, BATCH_SIZE)))
D_loss_sum += D_loss_dict['d_loss']
D_loss = D_loss_sum / D_UPDATES_PER_G_UPDATE
G_loss_dict = gan.train_G()
G_loss = G_loss_dict['g_loss']
tf.summary.scalar('d_loss', D_loss, step=(e*steps_per_epoch)+i)
tf.summary.scalar('g_loss', G_loss, step=(e*steps_per_epoch)+i)
print(f"step {(e*steps_per_epoch)+i}: d_loss = {D_loss} g_loss = {G_loss}")
if e % EPOCHS_PER_SAMPLE == 0:
My script is:
def Generator(d, a, num_samples, c=16):
# Prelim layers
input_layer = Input(shape=(100,))
dense_layer0 = Dense(256*d, input_shape=(100,))(input_layer)#
reshape_layer0 = Reshape((c, c*d))(dense_layer0)#
relu_layer0 = Activation('relu')(reshape_layer0)#
# WaveCNN layers
c //= 2
expanded_layer0 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer0)#relu_layer1
conv1d_t_layer0 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer0)
slice_layer0 = Lambda(lambda x: x[:, 0])(conv1d_t_layer0)
relu_layer2 = Activation('relu')(slice_layer0)
c //= 2
expanded_layer1 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer2)
conv1d_t_layer1 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer1)
slice_layer1 = Lambda(lambda x: x[:, 0])(conv1d_t_layer1)
relu_layer3 = Activation('relu')(slice_layer1)
c //= 2
expanded_layer2 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer3)
conv1d_t_layer2 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer2)
slice_layer2 = Lambda(lambda x: x[:, 0])(conv1d_t_layer2)
relu_layer4 = Activation('relu')(slice_layer2)
c //= 2
expanded_layer3 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer4)
conv1d_t_layer3 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer3)
slice_layer3 = Lambda(lambda x: x[:, 0])(conv1d_t_layer3)
relu_layer5 = Activation('relu')(slice_layer3)
expanded_layer4 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer5)
conv1d_t_layer4 = Conv2DTranspose(1, (1, 25), strides=(1, 4), padding='same')(expanded_layer4)#strides=(1,1)
slice_layer4 = Lambda(lambda x: x[:, 0])(conv1d_t_layer4)
tanh_layer0 = Activation('tanh')(slice_layer4)
model = Model(inputs=input_layer, outputs=tanh_layer0)
return model
def _apply_phaseshuffle(x, rad=2, pad_type='reflect'):
b, x_len, nch = x.get_shape().as_list()
phase = tf.random.uniform([], minval=-rad, maxval=rad + 1, dtype=tf.int32)
pad_l = tf.maximum(phase, 0)
pad_r = tf.maximum(-phase, 0)
phase_start = pad_r
x = tf.pad(x, [[0, 0], [pad_l, pad_r], [0, 0]], mode=pad_type)
x = x[:, phase_start:phase_start+x_len]
x.set_shape([b, x_len, nch])
return x
def Critic(d, a, num_samples, c=1):
input_layer = Input(shape=(a*num_samples, 1))#d*d
conv1d_layer0 = Conv1D(c*d, 25, strides=4, padding='same')(input_layer)#//2
LReLU_layer0 = LeakyReLU(alpha=0.2)(conv1d_layer0)
phaseshuffle_layer0 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer0)
c *= 2
conv1d_layer1 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer0)#d
LReLU_layer1 = LeakyReLU(alpha=0.2)(conv1d_layer1)
phaseshuffle_layer1 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer1)
c *= 2
conv1d_layer2 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer1)#2*d
LReLU_layer2 = LeakyReLU(alpha=0.2)(conv1d_layer2)
phaseshuffle_layer2 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer2)
c *= 2
conv1d_layer3 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer2)#4*d
LReLU_layer3 = LeakyReLU(alpha=0.2)(conv1d_layer3)
phaseshuffle_layer3 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer3)
c *= 2
conv1d_layer4 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer3)#8*d,strides=4
LReLU_layer4 = LeakyReLU(alpha=0.2)(conv1d_layer4)
phaseshuffle_layer4 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer4)
slice_layer0 = Lambda(lambda x: x[:, 0])(phaseshuffle_layer4)
dense_layer1 = Dense(1, input_shape=(256*d,))(slice_layer0)
model = Model(inputs=input_layer, outputs=dense_layer1)
return model

How to print out prediction value in tensorflow

I am new to tensorflow and I am a slow learner. After successfully compiling the model and get the accuracy I want to print the prediction variable but I dont know how to do it.
My dataset is multivariate feature with only one output. The output contains only 1, 0 ,-1 so I made one hot encoder for the output. I finished compiling the model and looking for computing prediction on tensorflow online, however I didnt find a good solution base on my question.
The precisionCalculate function is to compute precision on each column on test data since the trian_y and test_y after one hot encode becomes [1,0,0],[0,1,0],[0,0,1].
I have tried
y_pred =, 1), feed_dict={X: test_x, y: test_y})
but it turns out y_pred is exactly the same as my test_y
Here is my full code example.
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow.contrib.rnn
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
import pdb
def precisionCalculate(pred_y, test_y):
count = pred_y + test_y
firstZero = len(count[count==0])
countFour = len(count[count == 4])
precision1 = firstZero / len(pred_y[pred_y==0] )
precision3 = countFour / len(pred_y[pred_y==2])
return precision1, precision3
df = pd.read_csv('new_df.csv', skiprows=[0], header=None)
df.drop(columns=[0,1], inplace=True)
df.columns = [np.arange(0, df.shape[1])]
df[0] = df[0].shift(-1)
time_steps = 1
inputs = df.shape[1]
outputs = 3
#remove nan as a result of shift values
df = df.iloc[:-1, :]
#convert to numpy
df = df.values
train_number = 30276 #start date from 1018
train_x = df[: train_number, 1:]
test_x = df[train_number:, 1:]
train_y = df[:train_number, 0]
test_y = df[train_number:, 0]
#data pre-processing
#x y split
scaler = MinMaxScaler(feature_range=(0,1))
train_x = scaler.fit_transform(train_x)
test_x = scaler.fit_transform(test_x)
#reshape into 3d array
train_x = train_x[:, None, :]
test_x = test_x[:, None, :]
#one-hot encode the outputs
onehot_encoder = OneHotEncoder()
#encoder = LabelEncoder()
max_ = train_y.max()
max2 = test_y.max()
train_y = (train_y - max_) * (-1)
test_y = (test_y - max2) * (-1)
encode_categorical = train_y.reshape(len(train_y), 1)
encode_categorical2 = test_y.reshape(len(test_y), 1)
train_y = onehot_encoder.fit_transform(encode_categorical).toarray()
test_y = onehot_encoder.fit_transform(encode_categorical2).toarray()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)
#model parameters
learning_rate = 0.001
epochs = 100
batch_size = int(train_x.shape[0]/10)
length = train_x.shape[0]
display = 100
neurons = 100
X = tf.placeholder(tf.float32, [None, time_steps, 90],name='x')
y = tf.placeholder(tf.float32, [None, outputs],name='y')
#LSTM cell
cell = tf.contrib.rnn.BasicLSTMCell(num_units = neurons, activation = tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# pass into Dense layer
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
# squared error loss or cost function for linear regression
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=y))
# optimizer to minimize cost
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
accuracy = tf.metrics.accuracy(labels = tf.argmax(y, 1), predictions = tf.argmax(out, 1), name = "accuracy")
precision = tf.metrics.precision(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1), name="precision")
recall = tf.metrics.recall(labels=tf.argmax(y, 1), predictions=tf.argmax(out, 1),name="recall")
f1 = 2 * accuracy[1] * recall[1] / ( precision[1] + recall[1] )
with tf.Session() as sess:
# initialize all variables
# Train the model
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size), range(batch_size, length+1, batch_size))
epoch_loss = 0
i = 0
# train data in mini-batches
for (start, end) in mini_batch:, feed_dict = {X: train_x[start:end,:,:], y: train_y[start:end,:]})
# print training performance
if (steps+1) % display == 0:
# evaluate loss function on training set
loss_fn = loss.eval(feed_dict = {X: train_x, y: train_y})
print('Step: {} \tTraining loss: {}'.format((steps+1), loss_fn))
# evaluate model accuracy
acc, prec, recall, f1 =[accuracy, precision, recall, f1],feed_dict = {X: test_x, y: test_y})
y_pred =, 1), feed_dict={X: train_x, y: train_y})
test_y_alter = np.argmax(test_y, axis=1)
print(precisionCalculate(y_pred, test_y_alter))
#prediction = y_pred.eval(feed_dict={X: train_x, y: test_y})
print('\nEvaluation on test set')
print('Accuracy:', acc[1])
print('Precision:', prec[1])
print('Recall:', recall[1])
print('F1 score:', f1)
I think you should use the output of your model instead of the label (y) in tf.argmax.
Here is my code in order to print prediction of the model:
pred_y = tf.Print(tf.argmax(score, 1), [tf.argmax(score, 1)], message="prediction:)
In the above code, score means the probability output of your model.

Implementing LSTM regression model with tensor flow

I am trying to implement a tensor flow LSTM regression model for a list of inputs number.
input_data = [1, 2, 3, 4, 5]
time_steps = 2
-> X == [[1, 2], [2, 3], [3, 4]]
-> y == [3, 4, 5]
The code is below:
Xd, yd = load_data()
train_input = Xd['train']
train_input = train_input.reshape(-1,20,1)
train_output = yd['train']
# train_input = [[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],..
# train_output = [[21],[22],[23]....
test_input = Xd['test']
test_output = yd['test']
X = tf.placeholder(tf.float32, [None, 20, 1])
y = tf.placeholder(tf.float32, [None, 1])
cell = tf.nn.rnn_cell.LSTMCell(num_hidden, state_is_tuple=True)
val, state = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
val = tf.Print(val, [tf.argmax(val,1)], 'argmax(val)=' , summarize=20, first_n=7)
val = tf.transpose(val, [1, 0, 2])
val = tf.Print(val, [tf.argmax(val,1)], 'argmax(val2)=' , summarize=20, first_n=7)
# Take only the last output after 20 time steps
last = tf.gather(val, int(val.get_shape()[0]) - 1)
last = tf.Print(last, [tf.argmax(last,1)], 'argmax(val3)=' , summarize=20, first_n=7)
# define variables for weights and bias
weight = tf.Variable(tf.truncated_normal([num_hidden, int(y.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[y.get_shape()[1]]))
# Prediction is matmul of last value + wieght + bias
prediction = tf.matmul(last, weight) + bias
# Cost function using softmax
# y is the true distrubution and prediction is the predicted
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(prediction), reduction_indices=[1]))
#cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cost)
from tensorflow.python import debug as tf_debug
inita = tf.initialize_all_variables()
sess = tf.Session()
batch_size = 100
no_of_batches = int(len(train_input)/batch_size)
epoch = 10
test_size = 100
for i in range(epoch):
for start, end in zip(range(0, len(train_input), batch_size), range(batch_size, len(train_input)+1, batch_size)):, feed_dict={X: train_input[start:end], y: train_output[start:end]})
test_indices = np.arange(len(test_input)) # Get A Test Batch
test_indices = test_indices[0:test_size]
print (i, mean_squared_error(np.argmax(test_output[test_indices], axis=1),, feed_dict={X: test_input[test_indices]})))
print ("predictions", prediction.eval(feed_dict={X: train_input}, session=sess))
y_pred = prediction.eval(feed_dict={X: test_input}, session=sess)
test_size = test_output.shape[0]
ax = np.arange(0, test_size, 1)
plt.plot(ax, test_output, 'r', ax, y_pred, 'b')
But i am not able to minimize the cost, the calculated MSE increases at each step instead of decreasing.
I suspect there is a problem with the cost problem that i am using.
any thoughts or suggestions as to what i am doing wrong ?
As mentioned in the comment, you had to change your loss function to the MSE function and reduce your learning rate. Is your error converging to zero ?

tensorflow error occur on tf.matmul

I have error in 13 lines y = tf.matmul(W, x_data) + b below codes,
I cant understand reason
import tensorflow as tf
import numpy as np
x_data = np.float32(np.random.rand(2, 100))
y_data =[0.100, 0.200], x_data) + 0.300
b = tf.Variable(tf.zeros([1]))
W = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0))
y = tf.matmul(W, x_data) + b
loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)
init = tf.initialize_all_variables()
sess = tf.Session()
for step in xrange(0, 201):
if step % 20 == 0:
print step,,
#print "xdata=", x_data
#print "ydata=", y_data