Tensorflow does not train CIFAR - 100 data - numpy

I am trying to build a linear classifier with CIFAR - 100 using TensorFlow. I got the code from Martin Gorner's MNIST tutorial and change a bit. When I run this code, tensorflow does not training (code is running but accuracy remains 1.0 and loss(cross entropy remains as 4605.17), I don't know what is wrong, I am actually newbie to TF any help is appreciated.
import pickle
import numpy as np
import os
import tensorflow as tf
from tensorflow.python.framework import tensor_util
import math
#imports data
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
cifar100_test = {}
cifar100_train = {}
labelMap = {}
labelNames = {}
# Load the raw CIFAR-10 data.
cifar100_test = unpickle('dataset/cifar-100-python/test')
cifar100_train = unpickle('dataset/cifar-100-python/train')
labelMap = unpickle('dataset/cifar-100-python/meta')
#tr for training data and te for testing data, X is data, Y is label
Xtr = cifar100_train[b'data']
Yr = cifar100_train[b'fine_labels']
Xte = cifar100_test[b'data']
Ye = cifar100_test[b'fine_labels']
classNames = labelMap[b'fine_label_names']
num_train = Xtr.shape[0]
num_test = Xte.shape[0]
num_class = len(classNames)
Ytr = np.zeros([num_train, num_class])
Yte = np.zeros([num_test, num_class])
Ytr[0:num_train, Yr[0:num_train]] = 1
Yte[0:num_test, Ye[0:num_test]] = 1
# As a sanity check, we print out the size of the training and test data.
print('Train data shape:', Xtr.shape)
print('Train Label shape:', Ytr.shape)
print('Test data shape:', Xte.shape)
print('Test Label shape:', Yte.shape)
print('Name of Predicted Class:', classNames[0]) #indice of the label name is the indice of the class.
Xtrain = Xtr#[:1000]
Xtest = Xte#[:100]
Ytrain = Ytr#[:1000]
Ytest = Yte#[:100]
print('Train data shape:', Xtrain.shape)
print('Train Label shape:', Ytrain.shape)
print('Test data shape:', Xtest.shape)
print('Test Label shape:', Ytest.shape)
Xtrain = np.reshape(Xtrain,(50000, 32, 32, 3)).transpose(0,1,2,3).astype(float)
Xtest = np.reshape(Xtest,(10000, 32, 32, 3)).transpose(0,1,2,3).astype(float)
Xbatches = np.split(Xtrain, 500); #second number is # of batches
Ybatches = np.split(np.asarray(Ytrain), 500);
XtestB = np.split(Xtest, 100);
YtestB = np.split(Ytest, 100);
print('X # of batches:', len(Xbatches))
print('Y # of batches:', len(Ybatches))
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [100, 32, 32, 3])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [100, 100])
# weights W[784, 10] 784=28*28
W = tf.Variable(tf.zeros([3072, 100]))
# biases b[10]
b = tf.Variable(tf.zeros([100]))
# flatten the images into a single line of pixels
# -1 in the shape definition means "the only possible dimension that will preserve the number of elements"
XX = tf.reshape(X, [-1, 3072])
# The model
Y = tf.nn.softmax(tf.matmul(XX, W) + b)
# loss function: cross-entropy = - sum( Y_i * log(Yi) )
# Y: the computed output vector
# Y_: the desired output vector
# cross-entropy
# log takes the log of each element, * multiplies the tensors element by element
# reduce_mean will add all the components in the tensor
# so here we end up with the total cross-entropy for all images in the batch
cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images,
# *10 because "mean" included an unwanted division by 10
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# training, learning rate = 0.005
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(500):
# the backpropagation training step
t, Loss = sess.run([train_step, cross_entropy], feed_dict={X: Xbatches[i], Y_: Ybatches[i]})
print(Loss)
print(i)
for i in range(100):
print('accuracy:', sess.run(accuracy, feed_dict={X: XtestB[i], Y_: YtestB[i]}))

You compute the accuracy a hundred times after the training process is completed. Nothing will change there. You should place your print('accuracy:'....) within the for loop in which you perform the backpropagation:
for i in range(500):
# the backpropagation training step
t, Loss = sess.run([train_step, cross_entropy], feed_dict={X: Xbatches[i], Y_: Ybatches[i]})
print(Loss)
print(i)
print('accuracy:', sess.run(accuracy, feed_dict={X: XtestB[i], Y_: YtestB[i]}))

Sorry for the post it turns out that it is a basic mistake.
I changed following;
Ytr[0:num_train, Yr[0:num_train]] = 1
Yte[0:num_test, Ye[0:num_test]] = 1
with
Ytr[range(num_train), Yr_temp[range(num_train)]] = 1
Yte[range(num_test), Ye_temp[range(num_test)]] = 1
First one make all values 1, but I just wanted to make indice of the true class 1 and other elements 0. Thanks for your time.

Related

TensorFlow Linear Regression gives 'NaN' result

I am currently running the TensorFlow model with Linear Regression. However, I don't understand why, even when I decrease the learning_rate from 0.01 to 0.001 and increase the training iterations from 1000 to 50000, I still obtain the 'nan' result for the cost function, as well as the two coefficients. Could anyone please help me detect the problem in the following code?
from __future__ import print_function
import tensorflow as tf
import numpy
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import random
rng = numpy.random
# Parameters
learning_rate = 0.001
training_epochs = 20000 #number of iterations
display_step = 400
#read csv file
datapath = [directory path]
Ha_Noi = pd.read_csv(datapath+"HaNoi_1month_LW_WeatherTest.csv")
#Add an additional column into the table
sLength = len(Ha_Noi['accept_rate'])
Ha_Noi['accept_rate_timeT'] = pd.Series(Ha_Noi['accept_rate'], index=Ha_Noi.index)
#Shift the entries in the accept_rate column upward
Ha_Noi.accept_rate = Ha_Noi.accept_rate.shift(-1)
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent4"])
Ha_Noi = Ha_Noi.dropna(subset=["accept_rate"])
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent2"])
df2 = pd.DataFrame(Ha_Noi)
#split the dataset into training and testing sets
train_set, test_set = train_test_split(Ha_Noi, test_size=0.2, random_state = random.randint(20, 200))
Xtrain = train_set['longwait_percent2'].reshape(-1,1)
Ytrain = train_set['accept_rate'].reshape(-1,1)
Xtrain2 = train_set['Weather Weight_Longwait_percent2'].reshape(-1,1)
Xtest2 = test_set['Weather Weight_Longwait_percent2'].reshape(-1,1)
# Xtest = test_set['longwait_percent2'].reshape(-1,1)
# Ytest = test_set['accept_rate'].reshape(-1,1)
# Training Data
train_X = Xtrain
train_Y = Ytrain
n_samples = train_X.shape[0]
#Testing Data
Xtest = np.asarray(test_set['longwait_percent2'])
Ytest = np.asarray(test_set['accept_rate'])
# tf Graph Input
X = tf.placeholder("float")
Y = tf.placeholder("float")
# Set model weights
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
# Construct a linear model
pred = tf.add(tf.multiply(X, W), b)
# Mean squared error
cost = tf.sqrt(tf.reduce_sum(tf.pow(pred-Y, 2))/(n_samples))
# Gradient descent method
# Note, minimize() knows to modify W and b because Variable objects are "trained" (trainable=True by default)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
saver = tf.train.Saver() #save all the initialized data
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per epoch step
if (epoch+1) % display_step == 0: # checkpoint every 50 epochs
c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
# Graphic display
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
testing_cost = sess.run(
tf.reduce_sum(tf.pow(pred - Y, 2)) / (Xtest.shape[0]),
feed_dict={X: Xtest, Y: Ytest}) # square root of function cost above
print("Root Mean Square Error =", tf.sqrt(testing_cost))
print("Absolute mean square loss difference:", abs(
training_cost - testing_cost))
plt.plot(Xtest, Ytest, 'bo', label='Testing data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
Don't have your data, so it's hard to tell whether the problem is caused by data or by training problem. You can make learning rate and training iteration much smaller such 0.00005 and 100 to see is there still NaN.

debugging 'TypeError: Can not convert a ndarray into a Tensor or Operation' for CNN

I am trying to build a CNN, I have 8 classes in the input_samples with 45 samples in each class. so total number of input samples are 360. I have divided my first 20 samples as train samples and remaining 25 samples as test samples in each class (My input is a text file and the data is in rows is my preprocessed data, so I am reading the rows in textfile and reshaping the images which are 16x12 size).
I am unable to fix the error in the code
My code:
import numpy as np
import random
import tensorflow as tf
folder = 'D:\\Lab_Project_Files\\TF\\Practice Files\\'
Datainfo = 'dataset_300.txt'
ClassInfo = 'classTrain.txt'
INPUT_WIDTH = 16
IMAGE_HEIGHT = 12
IMAGE_DEPTH = 1
IMAGE_PIXELS = INPUT_WIDTH * IMAGE_HEIGHT # 192 = 12*16
NUM_CLASSES = 8
STEPS = 500
STEP_VALIDATE = 100
BATCH_SIZE = 5
def load_data(file1,file2,folder):
filename1 = folder + file1
filename2 = folder + file2
# loading the data file
x_data = np.loadtxt(filename1, unpack=True)
x_data = np.transpose(x_data)
# loading the class information of the data loaded
y_data = np.loadtxt(filename2, unpack=True)
y_data = np.transpose(y_data)
# divide the data in to test and train data
x_data_train = x_data[np.r_[0:20, 45:65, 90:110, 135:155, 180:200, 225:245, 270:290, 315:335],:]
x_data_test = x_data[np.r_[20:45, 65:90, 110:135, 155:180, 200:225, 245:270, 290:315, 335:360], :]
y_data_train = y_data[np.r_[0:20, 45:65, 90:110, 135:155, 180:200, 225:245, 270:290, 315:335]]
y_data_test = y_data[np.r_[20:45, 65:90, 110:135, 155:180, 200:225, 245:270, 290:315, 335:360],:]
return x_data_train,x_data_test,y_data_train,y_data_test
def reshapedata(data_train,data_test):
data_train = np.reshape(data_train, (len(data_train),INPUT_WIDTH,IMAGE_HEIGHT))
data_test = np.reshape(data_test, (len(data_test), INPUT_WIDTH, IMAGE_HEIGHT))
return data_train,data_test
def batchdata(data,label, batchsize):
# generate random number required to batch data
order_num = random.sample(range(1, len(data)), batchsize)
data_batch = []
label_batch = []
for i in range(len(order_num)):
data_batch.append(data[order_num[i-1]])
label_batch.append(label[order_num[i-1]])
return data_batch, label_batch
# CNN trail
def conv_net(x):
weights = tf.Variable(tf.random_normal([INPUT_WIDTH * IMAGE_HEIGHT * IMAGE_DEPTH, NUM_CLASSES]))
biases = tf.Variable(tf.random_normal([NUM_CLASSES]))
out = tf.add(tf.matmul(x, weights), biases)
return out
sess = tf.Session()
# get filelist and labels for training and testing
data_train,data_test,label_train,label_test = load_data(Datainfo,ClassInfo,folder)
data_train, data_test, = reshapedata(data_train, data_test)
############################ get files for training ####################################################
image_batch, label_batch = batchdata(data_train,label_train,BATCH_SIZE)
# input output placeholders
x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS])
y_ = tf.placeholder(tf.float32,[None, NUM_CLASSES])
# create the network
y = conv_net( x )
# loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
# train step
train_step = tf.train.AdamOptimizer( 1e-3 ).minimize( cost )
############################## get files for validataion ###################################################
image_batch_test, label_batch_test = batchdata(data_test,label_test,BATCH_SIZE)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
################ CNN Program ##############################
for i in range(STEPS):
# checking the accuracy in between.
if i % STEP_VALIDATE == 0:
imgs, lbls = sess.run([image_batch_test, label_batch_test])
print(sess.run(accuracy, feed_dict={x: imgs, y_: lbls}))
imgs, lbls = sess.run([image_batch, label_batch])
sess.run(train_step, feed_dict={x: imgs, y_: lbls})
imgs, lbls = sess.run([image_batch_test, label_batch_test])
print(sess.run(accuracy, feed_dict={ x: imgs, y_: lbls}))
file can be downloaded dataset_300.txt and ClassInfo.txt
Session.run accepts only a list of tensors or tensor names.
imgs, lbls = sess.run([image_batch_test, label_batch_test])
In the previous line, you are passing image_batch_test and label_batch_test which are numpy arrays. I am not sure what you are trying to do by imgs, lbls = sess.run([image_batch_test, label_batch_test])

How to use RNN tensorflow to learning one-Dimensional Data? AttributeError: 'numpy.ndarray' object has no attribute 'batch'

The one-D data concludes 80 samples, with everyone is 1089 length. I want to use 70 samples to training and 10 samples to testing.
I am totally beginner in python and tensorflow, so I use the code which is processing image(which is two-dimension). Here is the code I use(all the parameters are pretty low for I just want to test the code):
import tensorflow as tf
import scipy.io as sc
from tensorflow.python.ops import rnn, rnn_cell
# data read
feature_training = sc.loadmat("feature_training.mat")
feature_training = feature_training['feature_training']
print (feature_training.shape)
feature_testing = sc.loadmat("feature_testing.mat")
feature_testing = feature_testing['feature_testing']
print (feature_testing.shape)
label_training = sc.loadmat("label_training.mat")
label_training = label_training['label_training']
print (label_training.shape)
label_testing = sc.loadmat("label_testing.mat")
label_testing = label_testing['label_testing']
print (label_testing.shape)
# parameters
learning_rate = 0.1
training_iters = 100
batch_size = 70
display_step = 10
# network parameters
n_input = 70 # MNIST data input (img shape: 28*28)
n_steps = 100 # timesteps
n_hidden = 10 # hidden layer num of features
n_classes = 2 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(x, weights, biases):
# Prepare data shape to match `rgnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = feature_training.next_batch(batch_size)
# Reshape data to get 28 seq of 28 elements
batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
# loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print ("Iter " + str(step*batch_size) + ", Training Accuracy= " +
"{:.5f}".format(acc))
step += 1
print ("Optimization Finished!")
# Calculate accuracy for 10 testing data
test_len = 10
test_data = feature_testing[:test_len].reshape((-1, n_steps, n_input))
test_label = label_testing[:test_len]
print ("Testing Accuracy:",
sess.run(accuracy, feed_dict={x: test_data, y: test_label}))
At last, it turns out the Error:
Traceback (most recent call last):
File "/home/xiangzhang/MNIST data test.py", line 92, in <module>
batch_x, batch_y = feature_training.batch(batch_size)
AttributeError: 'numpy.ndarray' object has no attribute 'next_batch'
I thought it must be related with the dimension of the data, but I do not know how to fix it. Please help me, thanks very much.

How to write denoising autoencoder as RNN with tensorflow

I want to adapt this Recurrent Neural Network in Tensorflow (from this tutorial
https://github.com/aymericdamien/TensorFlow-Examples/
and then the RNN program)
), so that it will be a denoising autoencoder.
I have 5 time steps, and at each time, the noiseless target is sampled from sin(x), and the noisy input is sin(x)+ Gaussian error.
Now my problem is that the RNN from the example gives me 1 output value for each sequence of inputs, but I want an output for each time step ( I want 5 outputs, not 1)
How do I do this? I suspect it may be a matter of redefining the weights and biases, but how?
Here is the code. Many thanks for your help,
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
# Parameters
learning_rate = 0.0005
training_iters = 1000
batch_size = 3
display_step = 100
# Network Parameters
n_input = 2
n_output = 2
n_steps = 5 # timesteps
n_hidden = 40 # hidden layer num of features
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_input])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_output]))
}
biases = {
'out': tf.Variable(tf.random_normal([ n_output]))
}
# length of time series to be sampled
N = 1000000
dim_input = 2
x1 = np.zeros(N)
x2 = np.zeros(N)
y1 = np.zeros(N)
y2 = np.zeros(N)
# generate data
for i in range(0,N):
# clean
y1[i] = np.math.sin(i)
y2[i] = np.math.cos(i)
# noisy
x1[i] = y1[i]+np.random.normal(loc=0.0, scale=0.05)
x2[i] = y2[i]+np.random.normal(loc=0.0, scale=0.05)
def next_batch():
batch = np.empty([batch_size,n_steps,dim_input])
batch_y = np.empty([batch_size,n_steps,dim_input])
# for plotting purposes only
inits = np.empty([batch_size], dtype=int)
for b in range(0,batch_size):
# the first one of the batch
inits[b] = int(np.round(np.random.uniform(low=0,high=N-n_steps- 1)))
init = inits[b]
for i in range(0,n_steps):
# noisy input
batch[b,i,0] = x1[init + i]
batch[b,i,1] = x2[init + i]
# target (no noise)"
batch_y[b,i,0] = y1[init+i]
batch_y[b,i,1] = y2[init+i]
return(batch,batch_y,inits)
def RNN(x, weights, biases):
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
# SSE, there must be an easier way to do this
def get_cost(prediction,truth):
z = 0
for i in range(0,batch_size):
z = z + np.square(np.add(prediction[i,:], np.multiply(-1,truth[i,:])))
z = np.add(z[0],z[1])
z = np.sum(z)
return(z)
cost = get_cost(pred,y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).
minimize(cost)
# Evaluate model
accuracy = cost
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
print('step '+ str(step))
batch_x, batch_y, inits = next_batch()
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print(loss)
step += 1
print("Optimization Finished!")
If I run this, I get this error message:
ValueError: Shape (?, 5, 2) must have rank 2. This seems fair enough, because the target is 5 steps long, and the output only 1. But how do I fix that?
Many thanks.
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
import matplotlib.pyplot as plt
## Denoising autoencoder.
import numpy as np
count = 0
# length of time series to be sampled
N = 10000
x1 = np.zeros(N)
x2 = np.zeros(N)
y1 = np.zeros(N)
y2 = np.zeros(N)
batch_size = 30
learning_rate = 0.0005
training_iters = 300000
display_step = 100
# Network Parameters
n_input = 2
n_output = 2
n_steps = 15 # timesteps
n_hidden = 75 # hidden layer num of
# generate data
for i in range(0,N):
# clean
y1[i] = np.math.sin(i)
y2[i] = np.math.cos(i)
# noisy
x1[i] = y1[i]+np.random.normal(loc=0.0, scale=0.1)
x2[i] = y2[i]+np.random.normal(loc=0.0, scale=0.1)
def next_batch():
batch = np.empty([batch_size,n_steps,n_input])
batch_y = np.empty([batch_size,n_steps,n_input])
# for plotting purposes only
inits = np.empty([batch_size], dtype=int)
for b in range(0,batch_size):
# the first one of the batch
inits[b] = int(np.round(np.random.uniform(low=0,high=N-n_steps-1)))
init = inits[b]
for i in range(0,n_steps):
# noisy input
batch[b,i,0] = x1[init + i]
batch[b,i,1] = x2[init + i]
# target (no noise)"
batch_y[b,i,0] = y1[init+i]
batch_y[b,i,1] = y2[init+i]
return(batch,batch_y,inits)
# Parameters
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_output])
N_train = N - 500
def RNN(x):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.LSTMCell(num_units = n_hidden, forget_bias=1.0, num_proj=2)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
return outputs
print(x)
pred = RNN(x)
# Define loss and optimizer
def get_cost(prediction,truth):
#print('pred' + str(prediction))
# SSE. there must be an easier way than this:
z = 0
for step in range(0,n_steps):
for b in range(0,batch_size):
for y_dim in range(0,2):
d1 = prediction[step][b,y_dim]
d2 = truth[b,step,y_dim]
diff= (d1 - d2 )
z = z + diff * diff
return(z)
cost = get_cost(pred,y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
#print('step '+ str(step))
batch_x, batch_y, inits = next_batch()
# Reshape data to get 28 seq of 28 elements
#batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print(str(step) + ':' + str(loss))
step += 1
print("Optimization Finished!")
batch_size = 1
test_data, test_label, inits = next_batch()
#print "Testing Accuracy:", \
#sess.run(accuracy, feed_dict={x: test_data, y: test_label})
p2 = sess.run(pred, feed_dict={x: test_data, y: test_label})
#print('---inits---')
#print(inits)
print('---batch---')
print(test_data)
print('---truth---')
print(test_label)
print('---pred---')
print(p2)
c_final = get_cost(p2, test_label)
print(c_final)
First, we generate some data: a 2-dimensional series of sin(i) and cos(i), with i running from 1 to N. This gives us the variable y. Then we add some Normal noise to this series, and that's x. Then, we train a Recurrent Neural Net to create the clean output from the noisy input. In other words, we train the net such that it will output [cos(i),sin(i)] from input [cos(i)+e1,sin(i)+e2) ]. This is a plain vanilla denoising autoencoder, except that the data has a time element. Now you can feed new data into the neural net, and it will hopefully remove the noise.

Requesting multiple values from graph at same time

In the code below l2 surprisingly returns the same value as l1, but since the optimizer is being requested in the list before l2, I expected the loss to be the new loss after training. Can I not request multiple values at the same time from the graph and expect consistent output?
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=[None, 10])
y = tf.placeholder(tf.float32, shape=[None, 2])
weight = tf.Variable(tf.random_uniform((10, 2), dtype=tf.float32))
loss = tf.nn.sigmoid_cross_entropy_with_logits(tf.matmul(x, weight), y)
optimizer = tf.train.AdamOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
tf.initialize_all_variables().run()
X = np.random.rand(1, 10)
Y = np.array([[0, 1]])
# Evaluate loss before running training step
l1 = sess.run([loss], feed_dict={x: X, y: Y})[0][0][0]
print(l1) # 3.32393
# Running the training step
_, l2 = sess.run([optimizer, loss], feed_dict={x: X, y: Y})
print(l2[0][0]) # 3.32393 -- didn't change?
# Evaluate loss again after training step as sanity check
l3 = sess.run([loss], feed_dict={x: X, y: Y})[0][0][0]
print(l3) # 2.71041
No - the order in which you request them in the list has no effect on the evaluation order. For side-effect-having operations such as the optimizer, if you want to guarantee a specific ordering, you need to enforce it using with_dependencies or similar control-flow constructs. In general, ignoring side-effects, TensorFlow will return results to you by grabbing the node from the graph as soon as it's computed - and, obviously, the loss is computed before the optimizer, since the optimizer requires the loss as one of its input. (Remember that 'loss' is not a variable; it's a tensor; so it's not actually affected by the optimizer step.)
sess.run([loss, optimizer], ...)
and
sess.run([optimizer, loss], ...)
are equivalent.
As Dave points out, the order of arguments to Session.run() has no effect on the order of evaluation, and the loss tensor in your example does not have a dependency on the optimizer op. To add a dependency, you could use tf.control_dependencies() to add an explicit dependency on the optimizer running before fetching the loss:
with tf.control_dependencies([optimizer]):
loss_after_optimizer = tf.identity(loss)
_, l2 = sess.run([optimizer, loss_after_optimizer], feed_dict={x: X, y: Y})
I've tested logistic regression implemented in tensorflow with three ways of session.run:
all together
res1, res2, res3 = sess.run([op1, op2, op3])
separately
res1 = sess.run(op1)
res2 = sess.run(op2)
res3 = sess.run(op3)
with dependencies
with tf.control_dependencies([op1]):
op2_after = tf.identity(op1)
op3_after = tf.identity(op1)
res1,res2,res3 = session.run([op1, op2_after, op3_after])
set batch size as 10000, the result is:
1: 0.05+ secs < 2: 0.11+ secs < 3: 0.25+ secs
The main difference between 1 and 3 is only one mini-batch. It may not worth it to use 3 instead of 1.
Here is the test code (it is an LR example written by someone else...).
Here is the data
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 2 13:38:14 2017
#author: inse7en
"""
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import time
pickle_file = '/Users/inse7en/Downloads/notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
# Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
# This is to expedite the process
train_subset = 10000
# This is a good beta value to start with
beta = 0.01
graph = tf.Graph()
with graph.as_default():
# Input data.
# They're all constants.
tf_train_dataset = tf.constant(train_dataset[:train_subset, :])
tf_train_labels = tf.constant(train_labels[:train_subset])
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables
# They are variables we want to update and optimize.
weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
biases = tf.Variable(tf.zeros([num_labels]))
# Training computation.
logits = tf.matmul(tf_train_dataset, weights) + biases
# Original loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Loss function using L2 Regularization
regularizer = tf.nn.l2_loss(weights)
loss = tf.reduce_mean(loss + beta * regularizer)
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
num_steps = 50
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
with tf.Session(graph=graph) as session:
# This is a one-time operation which ensures the parameters get initialized as
# we described in the graph: random weights for the matrix, zeros for the
# biases.
tf.initialize_all_variables().run()
print('Initialized')
for step in range(num_steps):
# Run the computations. We tell .run() that we want to run the optimizer,
# and get the loss value and the training predictions returned as numpy
# arrays.
#_, l, predictions = session.run([optimizer, loss, train_prediction])
start_time = time.time()
with tf.control_dependencies([optimizer]):
loss_after_optimizer = tf.identity(loss)
predictions_after = tf.identity(train_prediction)
regularizers_after = tf.identity(regularizer)
_, l, predictions,regularizers = session.run([optimizer, loss_after_optimizer, predictions_after, regularizers_after])
print("--- with dependencies: %s seconds ---" % (time.time() - start_time))
#start_time = time.time()
#opt = session.run(optimizer)
#l = session.run(loss)
#predictions = session.run(train_prediction)
#regularizers = session.run(regularizer)
#print("--- run separately: %s seconds ---" % (time.time() - start_time))
#start_time = time.time()
#_, l, predictions,regularizers = session.run([optimizer, loss, train_prediction, regularizer])
#print("--- all together: %s seconds ---" % (time.time() - start_time))
#if (step % 100 == 0):
#print('Loss at step {}: {}'.format(step, l))
#print('Training accuracy: {:.1f}'.format(accuracy(predictions,
#train_labels[:train_subset, :])))
# Calling .eval() on valid_prediction is basically like calling run(), but
# just to get that one numpy array. Note that it recomputes all its graph
# dependencies.
# You don't have to do .eval above because we already ran the session for the
# train_prediction
#print('Validation accuracy: {:.1f}'.format(accuracy(valid_prediction.eval(),
#valid_labels)))
#print('Test accuracy: {:.1f}'.format(accuracy(test_prediction.eval(), test_labels)))
#print(regularizer)