I am creating a deep learning fully connected NN for the MNIST input. I have a function (it takes placeholder input)
# Create model
def multilayer_perceptron(x, activation_fn, weights, biases, dbg=False):
layerDatas = OrderedDict()
# get each layer data
prev = x
for i in range(len(weights)-1):
weight = weights.items()[i][1]
bias = biases.items()[i][1]
var = 'layer_' + str(i+1)
layerData = tf.add(tf.matmul(prev, weight), bias)
layerData = activation_fn(layerData)
prev = layerData
layerDatas[var] = layerData
# output layer with linear function, using the last layer output value
val = tf.matmul(prev, weights['out'])
out_layer = tf.matmul(prev, weights['out']) + biases['out']
print x.eval() # debug the data
return out_layer
which takes multiple layers in weights and biases. I call the main program with
sess = tf.InteractiveSession() # start a session
print 'Data', n_input, n_classes
print 'Train', train_set_x.shape, train_set_y.shape
(weights, biases) = createWeightsBiases(layers, n_input, n_classes, dbg)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Construct model
pred = multilayer_perceptron(x, activation_fn, weights, biases, dbg)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
done_looping = False
display_step = 1
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Launch the graph
sess.run(init)
# Training cycle
epochs = 1000
for epoch in range(epochs):
avg_cost = 0.
total_batch = int(len(train_set_x)/batch_size)
print 'Batch', total_batch, batch_size
# Loop over all batches
for i in range(total_batch):
batch_x = train_set_x[i * batch_size: (i + 1) * batch_size]
batch_y = train_set_y[i * batch_size: (i + 1) * batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print "Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost)
print "Optimization Finished!"
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print "Accuracy:", accuracy.eval({x: valid_set_x, y: valid_set_y})
When I try to print the tensor in my multilayer_perceptron function, I get a crash with
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
I would appreciate help in getting around this.
You can't eval a placeholder. Instead, you can feed the graph with a proper value for the placeholder and only then extract the content (that's the value you fed the graph with).
So, remove the print x.eval() # debug the data line from the multilayer_perceptron function.
To inspect the value of the placeholder you have to fed it and extract the value you just feed it (side note: it's useless).
If you really want to do this, that's how:
placeholder_value = sess.run(x, feed_dict={x: [1,2,3,4]})
print placeholder_value
It will print the value [1,2,3,4]
Related
it is a neural network of 12 layer full connect and 2 layer CNN. I have a sample to training it and want to save the model during the training process.But when I want to use the saving model to predict my new sample, I find that the model didn't save the whole variable.
Here are apart of my save model code.
W_hidden_4 = tf.Variable(weight_initializer([n_neurons_3,n_neurons_4]))
bias_hidden_4 = tf.Variable(bias_initializer([n_neurons_4]))
hidden_4 = tf.nn.relu(tf.add(tf.matmul(hidden_3,W_hidden_4),bias_hidden_4))
# Output layer: Variables for output weights and biases
W_out = tf.Variable(weight_initializer([n_neurons_6,n_rot]),name="W_out")
bias_out = tf.Variable(bias_initializer([n_rot]),name="bias_out")
out = tf.add(tf.matmul(hiddens['hidden_14'], W_out), bias_out,name="out")
# Cost function
# tf.reduce_mean:count the average value
mse = tf.reduce_mean(tf.squared_difference(out, Y))
opt = tf.train.AdamOptimizer().minimize(mse)
# Run initializer
net.run(tf.global_variables_initializer())
for e in range(epochs):
print(str(e) + ":")
# Shuffle training data
shuffle_indices = np.random.permutation(np.arange(len(y_train)))
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]
# Minibatch training
for i in range(0, len(y_train) // batch_size):
start = i * batch_size
batch_x = X_train[start:start + batch_size]
batch_y = y_train[start:start + batch_size]
# Run optimizer with batch
net.run(opt, feed_dict={X: batch_x, Y: batch_y})
# Show progress
if np.mod(i, 50) == 0:
# Prediction
pred = net.run(out, feed_dict={X: X_test})
mse_final = net.run(mse, feed_dict={X: batch_x, Y: batch_y})
print(mse_final)
if e%50 == 0:
model_path = "/home/student/fulldata/src/fc_bigpara/model_" + str(e/50)
save_path = saver.save(net, model_path)
and the following is my restore code
X_test = np.loadtxt('test_x.txt')
sess = tf.Session()
# First let's load meta graph and restore weights
saver =tf.train.import_meta_graph('/home/student/fulldata/src/old_model/fc_bigpara_14/model_19.0.meta')
#all_vars = tf.trainable_variables()
#for v in all_vars:
# print(v.name)
#print v.name,v.eval(self.sess)
saver.restore(sess,
"/home/student/fulldata/src/old_model/fc_bigpara_14/model_19.0")
all_vars = tf.trainable_variables()
for v in all_vars:
#print(v.name, v.eval(sess))
print(v.name)
print(v.shape)
# Now, let's access and create placeholders variables and
# create feed-dict to feed new data
graph = tf.get_default_graph()
X = tf.placeholder(dtype=tf.float32, shape=[None, 3])
Y = tf.placeholder(dtype=tf.float32, shape=[None, 6])
out=graph.get_tensor_by_name("Variable_25:0")
#todo
with open("result.txt","w") as f:
#for i in range(0, len(X_test)):
#start=i*batch_size
#batch_x = X_test[start:start + batch_size]
#batch_x=X_test[i]
feed_dict={X:X_test}
result=sess.run(out,feed_dict)
#print(result.shape)
I can't find the parameter "out" in the model't variable and I have add "name ='out'" but it can't work.So I can't run the code following
result=sess.run(out,feed_dict)
how can I modify my code to fix the bug??
I have an error like this:
InvalidArgumentError (see above for traceback): logits and labels must
be same size: logits_size=[10,9] labels_size=[7040,9] [[Node:
SoftmaxCrossEntropyWithLogits =
SoftmaxCrossEntropyWithLogits[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/gpu:0"](Reshape, Reshape_1)]]
But I can't find the tensor which occurs this error.... I think it is appeared by size mismatching...
My Input size is batch_size * n_steps * n_input
so, It will be 10*704*100, And I want to make the output
batch_size * n_steps * n_classes => It will by 10*700*9, by Bidirectional RNN
How should I change this code to fix the error?
batch_size means the number of datas like this:
data 1 : ABCABCABCAAADDD...
...
data 10 : ABCCCCABCDBBAA...
And
n_step means the length of each data ( The data was padded by 'O' to fix the length of each data) : 704
And
n_input means the data how to express the each alphabet in each data like this:
A - [1, 2, 1, -1, ..., -1]
And the output of the learning should be like this:
output of data 1 : XYZYXYZYYXY ...
...
output of data 10 : ZXYYRZYZZ ...
the each alphabet of output was effected by the surrounding and sequence of alphabet of input.
learning_rate = 0.001
training_iters = 100000
batch_size = 10
display_step = 10
# Network Parameters
n_input = 100
n_steps = 704 # timesteps
n_hidden = 50 # hidden layer num of features
n_classes = 9
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_classes])
weights = {
'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def BiRNN(x, weights, biases):
x = tf.unstack(tf.transpose(x, perm=[1, 0, 2]))
# Forward direction cell
lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Backward direction cell
lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
try:
outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32)
except Exception: # Old TensorFlow version only returns outputs not states
outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = BiRNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
while step * batch_size < training_iters:
batch_x, batch_y = next_batch(batch_size, r_big_d, y_r_big_d)
#batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
test_x, test_y = next_batch(batch_size, v_big_d, y_v_big_d)
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: test_x, y: test_y}))
The first return value of static_bidirectional_rnn is a list of tensors - one for each rnn step. By using only the last one in your tf.matmul you're losing all the rest. Instead, stack them into a single tensor of the appropriate shape, reshape for the matmul then shape back.
outputs = tf.stack(outputs, axis=1)
outputs = tf.reshape(outputs, (batch_size*n_steps, n_hidden))
outputs = tf.matmul(outputs, weights['out']) + biases['out']
outputs = tf.reshape(outputs, (batch_size, n_steps, n_classes))
Alternatively, you could use tf.einsum:
outputs = tf.stack(outputs, axis=1)
outputs = tf.einsum('ijk,kl->ijl', outputs, weights['out']) + biases['out']
I've been trying to solve a prediction/regression problem by using Tensor Flow, but i'm facing some problems. Let me give you some context before I explain my real problem.
The data I've been playing with are a set of 5 features, let's call them [f1, f2, f3, f4, f5], representing in somehow a particular phenomena identified by a real value (target).
What I've been trying to do is to train a Multi Layer Perceptron that learns the relationship among the features and the target values. In a nutshell i'd like to predict real values on the base of what the neural network as seen during the training phase.
I've identified this problem as prediction/regression problem and wrote down the following code:
#picking device to run on
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
# Parameters
learning_rate = 0.001
training_epochs = 99999
batch_size = 4096
STDDEV = 0.1
# Network Parameters
n_hidden_1 = 10 # 1st layer number of neurons
n_hidden_2 = 10 # 2nd layer number of neurons
n_hidden_3 = 10 # 3nd layer number of neurons
n_hidden_4 = 10 # 4nd layer number of neurons
n_hidden_5 = 10 # 5nd layer number of neurons
n_input = 5 # number of features
n_classes = 1 # one target value (float)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# LOADING DATA
data_train = loader.loadDataset(dir_feat, train_path, 'TRAIN', features)
data_test = loader.loadDataset(dir_feat, test_path, 'TEST', features)
valid_period = 5
test_period = 10
def multilayer_perceptron(x, weights, biases):
# Hidden layer with sigmoid activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.sigmoid(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.sigmoid(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3= tf.nn.sigmoid(layer_3)
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.sigmoid(layer_4)
layer_5 = tf.add(tf.matmul(layer_4, weights['h5']), biases['b5'])
layer_5 = tf.nn.sigmoid(layer_5)
# Output layer with linear activation
out = tf.matmul(layer_5, weights['out']) + biases['out']
return out
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1],stddev=STDDEV)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2],stddev=STDDEV)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3],stddev=STDDEV)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4],stddev=STDDEV)),
'h5': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_5],stddev=STDDEV)),
'out': tf.Variable(tf.random_normal([n_hidden_5, n_classes],stddev=STDDEV))
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'b3': tf.Variable(tf.random_normal([n_hidden_3])),
'b4': tf.Variable(tf.random_normal([n_hidden_4])),
'b5': tf.Variable(tf.random_normal([n_hidden_5])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
def RMSE():
return tf.sqrt(tf.reduce_mean(tf.square(y - pred)))
cost = RMSE()
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(1, training_epochs):
avg_cost = 0.
avg_R_square_train = []
train_dataset = loader.Dataset(data=data_train, batch_size=batch_size, num_feats=n_input)
total_batch = train_dataset.getNumberBatches()
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = train_dataset.next_batch(update=True)
# Run optimization op (backprop) and cost op (to get loss value)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
c_train = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
# Compute average loss
avg_cost += c_train / total_batch
print("Epoch:" + str(epoch) + ", TRAIN_loss = {:.9f}".format(avg_cost))
# TESTING
if epoch % test_period == 0:
c_test = sess.run(cost, feed_dict={x: data_test[0][0], y: data_test[0][1]})
print("Epoch:" + str(epoch) + ", TEST_loss = {:.9f}".format(c_test))
The problem I've encountered is that the cost function for the test set (after some iterations) got stuck in a local minima and doesn't decrease anymore.
Epoch:6697, TRAIN_loss = 2.162182076
Epoch:6698, TRAIN_loss = 2.156500859
Epoch:6699, TRAIN_loss = 2.157814605
Epoch:6700, TRAIN_loss = 2.160744122
Epoch:6700, TEST_loss = 2.301288128
Epoch:6701, TRAIN_loss = 2.139338647
...
Epoch:6709, TRAIN_loss = 2.166410744
Epoch:6710, TRAIN_loss = 2.162357884
Epoch:6710, TEST_loss = 2.301478863
Epoch:6711, TRAIN_loss = 2.143475396
...
Epoch:6719, TRAIN_loss = 2.145476401
Epoch:6720, TRAIN_loss = 2.150237552
Epoch:6720, TEST_loss = 2.301517725
Epoch:6721, TRAIN_loss = 2.151232243
...
Epoch:6729, TRAIN_loss = 2.163080522
Epoch:6730, TRAIN_loss = 2.160523321
Epoch:6730, TEST_loss = 2.301782370
...
Epoch:6739, TRAIN_loss = 2.156920952
Epoch:6740, TRAIN_loss = 2.162290675
Epoch:6740, TEST_loss = 2.301943779
...
I've tried to change the several hyper-parameters such as the number of hidden layer and/or the number of nodes, the learning rate, the batch size, etc but the situation doesn't change at all. I have also tried to use other loss function such as MAE, MSE.
Actually the number of data sample i have is roughly 270,000.
Can someone please suggest me how to solve this problem or give me some useful advices about it?
Thanks in advance.
Davide
The one-D data concludes 80 samples, with everyone is 1089 length. I want to use 70 samples to training and 10 samples to testing.
I am totally beginner in python and tensorflow, so I use the code which is processing image(which is two-dimension). Here is the code I use(all the parameters are pretty low for I just want to test the code):
import tensorflow as tf
import scipy.io as sc
from tensorflow.python.ops import rnn, rnn_cell
# data read
feature_training = sc.loadmat("feature_training.mat")
feature_training = feature_training['feature_training']
print (feature_training.shape)
feature_testing = sc.loadmat("feature_testing.mat")
feature_testing = feature_testing['feature_testing']
print (feature_testing.shape)
label_training = sc.loadmat("label_training.mat")
label_training = label_training['label_training']
print (label_training.shape)
label_testing = sc.loadmat("label_testing.mat")
label_testing = label_testing['label_testing']
print (label_testing.shape)
# parameters
learning_rate = 0.1
training_iters = 100
batch_size = 70
display_step = 10
# network parameters
n_input = 70 # MNIST data input (img shape: 28*28)
n_steps = 100 # timesteps
n_hidden = 10 # hidden layer num of features
n_classes = 2 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(x, weights, biases):
# Prepare data shape to match `rgnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = feature_training.next_batch(batch_size)
# Reshape data to get 28 seq of 28 elements
batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
# loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print ("Iter " + str(step*batch_size) + ", Training Accuracy= " +
"{:.5f}".format(acc))
step += 1
print ("Optimization Finished!")
# Calculate accuracy for 10 testing data
test_len = 10
test_data = feature_testing[:test_len].reshape((-1, n_steps, n_input))
test_label = label_testing[:test_len]
print ("Testing Accuracy:",
sess.run(accuracy, feed_dict={x: test_data, y: test_label}))
At last, it turns out the Error:
Traceback (most recent call last):
File "/home/xiangzhang/MNIST data test.py", line 92, in <module>
batch_x, batch_y = feature_training.batch(batch_size)
AttributeError: 'numpy.ndarray' object has no attribute 'next_batch'
I thought it must be related with the dimension of the data, but I do not know how to fix it. Please help me, thanks very much.
I define initial states but when I print them they are still zero !!!
Below is my code
def BiRNN(x, weights, biases):
#some x shaping
lstm_fw_cell = rnn_cell.GRUCell(n_hidden)
lstm_bw_cell = rnn_cell.GRUCell(n_hidden)
init_state_fw = lstm_fw_cell.zero_state(batch_size, tf.float32)
init_state_bw = lstm_bw_cell.zero_state(batch_size, tf.float32)
outputs, fstate, bstate = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32,initial_state_fw=init_state_fw,
initial_state_bw=init_state_bw)
return [tf.matmul(outputs[-1], weights['out']) + biases['out'],initial_state_fw]
pred = BiRNN(x, weights, biases)
cost = tf.reduce_mean(tf.pow(pred[0] - y, 2))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.initialize_all_variables()
saver = tf.train.Saver()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
# Start populating the filename queue.
batch_x= example3[step%n_samples]
batch_y=label3[step%n_samples]
batch_x = batch_x.reshape((batch_size, n_steps, n_input))
batch_y = batch_y.reshape((batch_size, n_steps, n_classes))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
w=sess.run(pred[1])
print(w);
I want to get the initial states for later
I have a data set that requires two different initial states and I want the initial states to be trained separately.