TensorFlow equivalent of this code written in Keras - tensorflow

The Keras code runs perfectly, and the loss is close to zero. The input data is xData, the labeled data is yTrainData.
xData = np.reshape(xData, (-1, 1, sendLengthG * 4))
yTrainData = np.reshape(yTrainData, (-1, sendLengthG, sentComponentTypeCount))
model = k.models.Sequential()
model.add(k.layers.Dense(512, input_shape=(1, sendLengthG * 4), activation='tanh'))
model.add(k.layers.Dense(sendLengthG * sentComponentTypeCount, activation='linear'))
model.add(k.layers.Reshape([sendLengthG, sentComponentTypeCount]))
model.add(k.layers.Dense(sentComponentTypeCount, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='RMSProp', metrics=['accuracy'])
model.summary()
model.fit(xData, yTrainData, epochs=roundCount, batch_size=1, verbose=2)
I've written some TensorFlow code like below, but cannot reduce the loss under 0.011
x = tf.placeholder(dtype=tf.float32)
yTrain = tf.placeholder(dtype=tf.float32)
x1 = tf.reshape(x, shape=[1, sendLengthG * 4])
nodeCount1 = 18
w1 = tf.Variable(tf.random_normal([sendLengthG * 4, nodeCount1], mean=0.5, stddev=0.1), dtype=tf.float32)
b1 = tf.Variable(tf.zeros([nodeCount1]), dtype=tf.float32)
n1 = tf.nn.tanh(tf.matmul(x1, w1) + b1)
nodeCount2 = 21
w2 = tf.Variable(tf.random_normal([nodeCount1, nodeCount2], mean=1.5, stddev=0.1), dtype=tf.float32)
b2 = tf.Variable(tf.zeros([nodeCount2]), dtype=tf.float32)
n2 = tf.nn.tanh(tf.matmul(n1, w2) + b2)
wn = tf.Variable(tf.random_normal([nodeCount2, sendLengthG * sentComponentTypeCount], mean=0.5, stddev=0.1), dtype=tf.float32)
bn = tf.Variable(tf.zeros([sendLengthG * sentComponentTypeCount]), dtype=tf.float32)
y = tf.matmul(n2, wn) + bn
yResult = tf.nn.softmax(tf.reshape(y, [sendLengthG, -1]))
loss = -tf.reduce_mean(yTrain * tf.log(tf.clip_by_value(yResult, 1e-10, 1.0)))
optimizer = tf.train.RMSPropOptimizer(learnRate)
train = optimizer.minimize(loss)

Related

Getting different Accuracy rates in convolutional neural networks while using tensorflow and tflearn

So i was using tflearn to make a CNN and the accuracy was nice, but i tried to train the same kind of network with the same learning rates and other parameters. But for some reason i don't understand the accuracy i got when using tensorflow was lower. Is there any reason that this happened?
Here is my Neural net layer:
def cnn(x):
x = tflearn.layers.core.input_data(shape=[None, 50, 50, 3], placeholder=x)
conv_layer1 = tflearn.layers.conv.conv_2d(x, nb_filter=32, filter_size=5, activation='relu')
out_layer_1 = tflearn.layers.max_pool_2d(conv_layer1, 5)
conv_layer2 = tflearn.layers.conv.conv_2d(out_layer_1, nb_filter=64, filter_size=5, activation='relu')
out_layer_2 = tflearn.layers.max_pool_2d(conv_layer2, 5)
conv_layer3 = tflearn.layers.conv.conv_2d(out_layer_2, nb_filter=128, filter_size=5, activation='relu')
out_layer_3 = tflearn.layers.max_pool_2d(conv_layer3, 5)
conv_layer4 = tflearn.layers.conv.conv_2d(out_layer_3, nb_filter=64, filter_size=5, activation='relu')
out_layer_4 = tflearn.layers.max_pool_2d(conv_layer4, 5)
conv_layer5 = tflearn.layers.conv.conv_2d(out_layer_4, nb_filter=32, filter_size=5, activation='relu')
out_layer_5 = tflearn.layers.max_pool_2d(conv_layer5, 5)
fc1 = tflearn.layers.core.fully_connected(out_layer_5, 1024, activation='relu', name="FC1")
fc1_dropout = tflearn.layers.core.dropout(fc1, 0.5)
output = tflearn.layers.core.fully_connected(fc1_dropout, 2, activation='softmax', name='output')
return output
And here is my training function:
def train_model():
x = tf.placeholder(tf.float32, shape=[None, 50, 50, 3], name="x")
x_image = tf.reshape(x, [-1, 50, 50, 3])
y = tf.placeholder(tf.float32, shape=[None, 2], name="y")
y_cls = tf.argmax(y, dimension=1)
y_pred = cnn(x_image)
print "Importing Training Data..."
x_train = np.load('data/CatOrDog/training_images.npy')
y_train = np.load('data/CatOrDog/training_labels.npy')
y_train = [[1, 0] if label == 'Dog' else [0, 1] for label in y_train]
y_train = np.array(y_train)
randomer = np.arange(x_train.shape[0])
np.random.shuffle(randomer)
x_train = x_train[randomer]
y_train = y_train[randomer]
n_data = len(x_train)
x_train = np.array(x_train, dtype='float32')
print "Images Shape: ", x_train.shape, "\t", x_train.dtype
print "\nImporting Testing Data..."
x_test = np.load('data/CatOrDog/testing_images.npy')
y_test = np.load('data/CatOrDog/testing_labels.npy')
y_test = [[1, 0] if testing_label == 'Dog' else [0, 1] for testing_label in y_test]
y_test = np.array(y_test)
x_test = np.array(x_test, dtype='float32')
randomer = np.arange(x_test.shape[0])
np.random.shuffle(randomer)
x_test = x_test[randomer]
y_test = y_test[randomer]
n_data = len(x_train)
n_test_data =len(x_test)
'''divider = int(n_test_data / 2)
x_test_data = x_test[0:divider]
y_test_data = y_test[0:divider]
x_validation_data = x_test[divider+1:n_test_data-1]
y_validation_data = y_test[divider + 1:n_test_data - 1]'''
with tf.variable_scope("Softmax"):
y_pred_cls = tf.argmax(y_pred, dimension=1)
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y)
cost = tf.reduce_mean(cross_entropy)
with tf.name_scope("Optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
with tf.name_scope("Accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
writer = tf.summary.FileWriter("Training_FileWriter/")
writer1 = tf.summary.FileWriter("Validation_FileWriter/")
tf.summary.scalar('loss', cost)
tf.summary.scalar('accuracy', accuracy)
merged_summary = tf.summary.merge_all()
num_epochs = 10
batch_size = 300
with tf.Session() as sess:
# x = sess.graph.get_tensor_by_name('x')
sess.run(tf.global_variables_initializer())
writer.add_graph(sess.graph)
for epoch in range(num_epochs):
start_time = time.time()
train_accuracy = 0
cur_batch = int(n_data / batch_size)
prev_index = 0
bar = progressbar.ProgressBar(maxval=cur_batch)
bar.start()
for batch in range(0, cur_batch):
start, end = ClassifyData.get_batch_array_indexes(previous_index=prev_index, batch_size=batch_size, n_data=n_data)
if start == n_data:
break
x_batch = x_train[start:end]
y_true_batch = y_train[start:end]
feed_dict_train = {x: x_batch, y: y_true_batch}
sess.run(optimizer, feed_dict=feed_dict_train)
train_accuracy += sess.run(accuracy, feed_dict=feed_dict_train)
summ = sess.run(merged_summary, feed_dict=feed_dict_train)
writer.add_summary(summ, epoch * int(n_data / batch_size) + batch)
bar.update(batch)
bar.finish()
train_accuracy /= int(n_data / batch_size)
summ, vali_accuracy = sess.run([merged_summary, accuracy],
feed_dict={x: x_test, y: y_test})
writer1.add_summary(summ, epoch)
end_time = time.time()
print "\nEpoch " + str(epoch + 1) + " completed : Time usage " + str(
int(end_time - start_time)) + " seconds"
print "\tAccuracy:"
print "\t- Training Accuracy:\t{}".format(train_accuracy)
print "\t- Validation Accuracy:\t{}".format(vali_accuracy)
PS I am using tflearn to build my network.

TensorFlow, the output of AlexNet are all zero, after training several steps, and sometime the output is all the same in each dimension

here is the output of AlexNet, I mean it's the output of the third full-connection
I don't know why all the output is zero. And I try to reduce the layer, while cutting the last two layer, the output is all the same in each dimension, after training several steps. Any idea will be appreciated.
Here is the main Inference code and initial value:
import tensorflow as tf
import numpy as np
import os
import csv
import cifar10
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
IMAGE_SIZES = 32
IMAGE_CHANELS = 3
NUM_CLASSES = 10
FIRST_CONV_NUM = 64
SECOND_CONV_NUM = 192
THIRD_CONV_NUM = 384
FOURTH_CONV_NUM = 256
FIFTH_CONV_NUM = 256
MAX_POOL_SIZE = 3
BATCH_SIZE = 100
FIRST_FC_UNIT_NUM = 4096
SECOND_FC_UNIT_NUM = 1000
DROP_OUT_PRO = 0.5
THIRD_FC_UNIT_NUM = NUM_CLASSES
TRAIN_EPOCH = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
NUM_EPOCHS_PER_DECAY = 350.0
LEARNING_RATE_DECAY_FACTOR = 0.1
INITIAL_LEARNING_RATE = 0.01
DISPLAY_STEPS = 5
def leaky_relu(x, alpha=0.0):
return tf.nn.relu(x) - alpha * tf.nn.relu(-x)
def activation(x,alpha=0.0):
if alpha > 0:
return leaky_relu(x,alpha)
else:
return tf.nn.relu(x)
def Alex_Weight(weight_name, weight_shape, weight_stddev, weight_type):
Weight = tf.truncated_normal(shape=weight_shape, stddev=weight_stddev,type=weight_type)
return tf.Variable(initial_value=Weight, trainable=True, name=weight_name)
def Alex_Bias(bias_name, bias_shape, bias_type, bias_init=0.1):
initial = tf.constant(bias_init, shape=bias_shape)
return tf.Variable(initial, trainable=True, dtype=bias_type,name=bias_name)
def Alex_AddActivationSummary(out):
tf.summary.histogram('/activations',out)
tf.summary.scalar('/sparsity',tf.nn.zero_fraction(out))
def Alex_Conv(conv_name, input, weight, bias, strides, alpha=0.1,padding="SAME", activation=activation, act_name="ReLU"):
with tf.name_scope(conv_name):
conv = tf.nn.conv2d(input, weight, [1, strides, strides, 1], padding)
pre_activation = tf.nn.bias_add(conv, bias)
with tf.name_scope(act_name):
conv = activation(pre_activation,alpha=alpha)
return conv
def Alex_Pool(conv, ksize, strides, pool_fuction=tf.nn.max_pool, padding="SAME"):
return pool_fuction(conv, [1, ksize, ksize, 1], [1, strides,strides, 1], padding)
def Alex_Fully_Connect( input, weight, bias, activation=tf.nn.relu, act_name="ReLU"):
with tf.name_scope("Wx_b"):
y = tf.add(tf.matmul(input, weight), bias)
with tf.name_scope(act_name):
fc = activation(y, act_name)
return fc
def Alex_Norm(norm_name, pool):
with tf.name_scope(norm_name):
norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name=norm_name)
return norm
def Alex_Inference(images):
with tf.name_scope("First_Conv"):
W1 = Alex_Weight("Fist_Conv_Weight", [5, 5, IMAGE_CHANELS, FIRST_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias1 = Alex_Bias("First_Conv_Bias", [FIRST_CONV_NUM], tf.float32,bias_init=0.0)
first_conv = Alex_Conv("First_Conv", images, W1, bias1, strides=1, padding="SAME")
Alex_AddActivationSummary(first_conv)
with tf.name_scope('lrn1') as scope:
lrn1 = tf.nn.local_response_normalization(first_conv,
alpha=1e-4,
beta=0.75,
depth_radius=2,
bias=2.0)
with tf.name_scope("First_Pool"):
first_pool = Alex_Pool(lrn1, MAX_POOL_SIZE, strides=2, padding="VALID")
with tf.name_scope("Second_Conv"):
W2 = Alex_Weight("Second_Conv_Weight", [5, 5, FIRST_CONV_NUM, SECOND_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias2 = Alex_Bias("Second_Conv_Bias", [SECOND_CONV_NUM], tf.float32,bias_init=1.0)
second_conv = Alex_Conv("Second_Conv", first_pool, W2, bias2, strides=1, padding="SAME")
Alex_AddActivationSummary(second_conv)
with tf.name_scope('lrn2') as scope:
lrn2 = tf.nn.local_response_normalization(second_conv,
alpha=1e-4,
beta=0.75,
depth_radius=2,
bias=2.0)
with tf.name_scope("Second_Pool"):
second_pool = Alex_Pool(lrn2, MAX_POOL_SIZE, strides=2, padding="VALID")
with tf.name_scope("Third_Conv"):
W3 = Alex_Weight("Third_Conv_Weight", [3, 3, SECOND_CONV_NUM, THIRD_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias3 = Alex_Bias("Third_Conv_Bias", [THIRD_CONV_NUM], tf.float32,bias_init=0.0)
third_conv = Alex_Conv("Third_Conv", second_pool, W3, bias3, strides=1, padding="SAME")
Alex_AddActivationSummary(third_conv)
with tf.name_scope("Fourth_Conv"):
W4 = Alex_Weight("Fourth_Conv_Weight", [3, 3, THIRD_CONV_NUM, FOURTH_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias4 = Alex_Bias("Fourth_Conv_Bias", [FOURTH_CONV_NUM], tf.float32,bias_init=1.0)
fourth_conv = Alex_Conv("Fourth_Conv", third_conv, W4, bias4, strides=1, padding="SAME")
Alex_AddActivationSummary(fourth_conv)
with tf.name_scope("Fifth_Conv"):
W5 = Alex_Weight("Fifth_Conv_Weight", [3, 3, FOURTH_CONV_NUM, FIFTH_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias5 = Alex_Bias("Fifth_Conv_Bias", [FIFTH_CONV_NUM], tf.float32,bias_init=1.0)
fifth_conv = Alex_Conv("Fifth_Conv", fourth_conv, W5, bias5, strides=1, padding="SAME")
Alex_AddActivationSummary(fifth_conv)
with tf.name_scope("Third_Pool"):
third_pool = Alex_Pool(fifth_conv, MAX_POOL_SIZE, strides=2, padding="VALID")
with tf.name_scope("Flatten"):
flatten = tf.reshape(third_pool, [BATCH_SIZE, -1])
flatten_dim = flatten.get_shape()[1].value
with tf.name_scope("First_Fully_Connection"):
W = Alex_Weight("Fist_FC_Weight", [flatten_dim, FIRST_FC_UNIT_NUM], weight_stddev=4e-2, weight_type=tf.float32)
bias = Alex_Bias("First_FC_Bias", [FIRST_FC_UNIT_NUM], tf.float32, bias_init=1.0)
fc1 = Alex_Fully_Connect(flatten, W, bias, activation=tf.nn.relu, act_name="ReLU")
Alex_AddActivationSummary(fc1)
with tf.name_scope("Drop_Out_1"):
drop_out_1 = tf.nn.dropout(fc1, DROP_OUT_PRO)
with tf.name_scope("Second_Fully_Connection"):
W = Alex_Weight("Second_FC_Weight", [FIRST_FC_UNIT_NUM, SECOND_FC_UNIT_NUM], weight_stddev=4e-2,
weight_type=tf.float32)
bias = Alex_Bias("Second_FC_Bias", [SECOND_FC_UNIT_NUM], tf.float32, bias_init=1.0)
fc2 = Alex_Fully_Connect(drop_out_1, W, bias, activation=tf.nn.relu, act_name="ReLU")
Alex_AddActivationSummary(fc2)
with tf.name_scope("Drop_Out_2"):
drop_out_2 = tf.nn.dropout(fc2, DROP_OUT_PRO)
with tf.name_scope("Third_Fully_Connection"):
W = Alex_Weight("Third_FC_Weight", [SECOND_FC_UNIT_NUM, THIRD_FC_UNIT_NUM], weight_stddev=1/SECOND_FC_UNIT_NUM,
weight_type=tf.float32)
bias = Alex_Bias("Third_FC_Bias", [THIRD_FC_UNIT_NUM], tf.float32,bias_init=1.0)
fc3 = Alex_Fully_Connect(drop_out_2, W, bias, activation=tf.nn.relu, act_name="ReLU")
Alex_AddActivationSummary(fc3)
return fc3

tf.layers.batch_normalization freezes during sess.run() (1.5.0-dev20171031)

The graph building phase passes without error, but the program freezes (no reading hard drive, no memory change, no ...) during sess.run() in the first mini-batch in the first epoch. If I remove this layer or replace it with tf.contrib.layers.layer_norm, the program runs without issues.
The tensor (x) I pass into tf.layers.batch_normalization has the shape [#batches, 200]. I use most default values, but turned off the center and scale.
x_BN = tf.layers.batch_normalization(
x,
axis=-1,
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name=None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
The tensorflow version I'm using is tf-nightly-gpu (1.5.0-dev20171031 or 1.5.0-dev20171023). Has anyone encountered a similar problem?
Update
This happens when the input of tf.layers.batch_normalization is from tf.nn.bidirectional_dynamic_rnn, please see a simplified code to reproduce this issue:
import tensorflow as tf
import numpy as np
starter_learning_rate = 0.001
decay_steps = 100
decay_rate = 0.96
num_RNN_layers = 3
LSTM_CELL_SIZE = 100
keep_prob = 0.95
with tf.name_scope('Inputs'):
x = tf.placeholder(dtype=tf.float32, shape=[None, 200])
y = tf.placeholder(dtype=tf.float32, shape=[None, 200])
length = tf.placeholder(dtype=tf.int32, shape=[None])
Flg_training = tf.placeholder(dtype=tf.bool, shape=[])
x_1 = tf.expand_dims(x, -1)
with tf.name_scope('BiLSTM'):
dropcells = []
for iiLyr in list(range(num_RNN_layers)):
cell_iiLyr = tf.nn.rnn_cell.LSTMCell(num_units=LSTM_CELL_SIZE, state_is_tuple=True)
dropcells.append(tf.nn.rnn_cell.DropoutWrapper(cell=cell_iiLyr, output_keep_prob=keep_prob)) #,, input_keep_prob=self.keep_prob input_keep_prob=1.0, seed=None
MultiLyr_cell = tf.nn.rnn_cell.MultiRNNCell(cells=dropcells, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=MultiLyr_cell,
cell_bw=MultiLyr_cell,
dtype=tf.float32,
sequence_length=length, #tf_b_lens
inputs=x_1, #stacked_RefPts_desc, #tf_b_VCCs_AMs_BN1
scope = "BiLSTM"
)
#output_fw, output_bw = outputs
states_fw, states_bw = states
c_fw_lstLyr, h_fw_lstLyr = states_fw[-1]
c_bw_lstLyr, h_bw_lstLyr = states_bw[-1]
states_concat1 = tf.concat([h_fw_lstLyr, h_bw_lstLyr], axis = 1, name = 'states_concat')
with tf.name_scope("cs_BN1"):
x_BN = tf.layers.batch_normalization(
states_concat1,
axis=-1, # axis that should be normalized (typically the features axis, in this case the concated states or hidden vectors)
momentum=0.99,
epsilon=1e-10, #0.001,
center=False, #True,
scale=False, #True,
beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(),
beta_regularizer=None,
gamma_regularizer=None,
beta_constraint=None,
gamma_constraint=None,
training=Flg_training, #False,
trainable=True,
name="test_BN", #None,
reuse=None,
renorm=False,
renorm_clipping=None,
renorm_momentum=0.99,
fused=False,
virtual_batch_size=None,
adjustment=None
)
with tf.name_scope("Regression"):
a = tf.get_variable("a", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
b = tf.get_variable("b", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
with tf.name_scope("Prediction"):
y_pred = tf.multiply(x_BN, a) + b
with tf.name_scope('Loss'):
losses = tf.losses.mean_squared_error(y, y_pred, reduction=tf.losses.Reduction.NONE)
mean_loss = tf.reduce_mean(losses)
with tf.name_scope('Training'):
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
decay_steps, decay_rate, staircase=True)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(losses, global_step=global_step)
#x_mean = tf.reduce_mean(x_BN, axis=0)
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter("G:\\Surface_Ozone\\Temp\\", sess.graph)
sess.run(tf.global_variables_initializer())
for ii in list(range(2000)):
x_in = (np.random.rand(20, 200))
y_in = x_in * 1.5 + 3.0
length_in = np.full([20], 200, dtype=np.int32)
_, mean_loss_val, a_val, b_val = sess.run([train_step, mean_loss, a, b], feed_dict={
x: x_in,
Flg_training: True,
y: y_in,
length: length_in
})
if (ii < 50):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
else:
if (ii % 100 == 0):
print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
print("Normal End.")

TensorFlow: why use fp result y rather than ExponentialMovingAverage fp result average_y as cross_entropy's parameter?

The code is as below using python 3,Anaconda Spyder3.6,Tensorflow 1.0.0
"""
Created on Sat Oct 14 11:00:54 2017
#author: Han.H
"""
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001 #lambda
TRAINING_STEPS = 20000
MOVING_AVERAGE_DECAY = 0.99
# when not use ExponentialMovingAverage,just nomal fp
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
if avg_class == None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
return tf.matmul(layer1, weights2) + biases2
else:
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)
# build a 3-layer full connected NN
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
# normal fp
y = inference(x, None, weights1, biases1, weights2, biases2)
global_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# L2
regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
regularaztion = regularizer(weights1) + regularizer(weights2)
loss = cross_entropy_mean + regularaztion
# Set learning rate
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
# Gradient descent
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
test_feed = {x: mnist.test.images, y_: mnist.test.labels}
for i in range(TRAINING_STEPS):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
xs,ys=mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op,feed_dict={x:xs,y_:ys})
test_acc=sess.run(accuracy,feed_dict=test_feed)
print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))
def main(argv=None):
# Main programme here
mnist = input_data.read_data_sets("F:/python/MNIST_data/", one_hot=True)
train(mnist)
if __name__=='__main__':
main()
This code has no problem and run well. I just want to know that why can't use average_y as logits to calculate cross entropy.I tried to do so.It turned out terrible results.The accuracy was as random-initialized firstly as 0.009.

ValueError: Cannot feed value of shape (3375, 50, 50, 2) for Tensor 'Reshape:0', which has shape '(?, 5000)'

I am learning Tensorflow. Following is my code for MLP with TensorFlow. I have some issues with mismatching of data dimentions.
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
wholedataset = np.load('C:/Users/pourya/Downloads/WholeTrueData.npz')
data = wholedataset['wholedata'].astype('float32')
label = wholedataset['wholelabel'].astype('float32')
height = wholedataset['wholeheight'].astype('float32')
print(type(data[20,1,1,0]))
learning_rate = 0.001
training_iters = 5
display_step = 20
n_input = 3375
X = tf.placeholder("float32")
Y = tf.placeholder("float32")
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 2, 1])),
'wd1': tf.Variable(tf.random_normal([3, 3, 1, 1]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([1])),
'out': tf.Variable(tf.random_normal([1,50,50,1]))
}
mnist= data
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 2
batch_size = 100
x = tf.placeholder('float', shape = [None,50,50,2])
shape = x.get_shape().as_list()
dim = np.prod(shape[1:])
x_reshaped = tf.reshape(x, [-1, dim])
y = tf.placeholder('float', shape= [None,50,50,2])
shape = y.get_shape().as_list()
dim = np.prod(shape[1:])
y_reshaped = tf.reshape(y, [-1, dim])
def neural_network_model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([5000,
n_nodes_hl1])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1,
n_nodes_hl2])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2,
n_nodes_hl3])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3,
n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes])),}
l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']),
hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']),
hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden_3_layer['weights']),
hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3,output_layer['weights']) + output_layer['biases']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(n_input/batch_size)):
epoch_x = wholedataset['wholedata'].astype('float32')
epoch_y = wholedataset['wholedata'].astype('float32')
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y:
epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out
of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:mnist.test.images,
y:mnist.test.labels}))
train_neural_network(x)
I got the following error:
ValueError: Cannot feed value of shape (3375, 50, 50, 2) for Tensor 'Reshape:0', which has shape '(?, 5000)'
Does anyone know what is the issue with my code, and how can I fix it?
The data value is (3375, 50, 50, 2)
Thank you for anyone's input!
I think that the problem is that you use the same variable name x for the placeholder and the reshape, in lines
x = tf.placeholder('float', shape = [None,50,50,2])
and
x = tf.reshape(x, [-1, dim])
so that when you
feed_dict={x: your_val}
you are feeding the output of the reshape operation.
You should have different names, for instance
x_placeholder = tf.placeholder('float', shape = [None,50,50,2])
x_reshaped = tf.reshape(x, [-1, dim])
and then
feed_dict={x_placeholder: your_val}