Related
So i was using tflearn to make a CNN and the accuracy was nice, but i tried to train the same kind of network with the same learning rates and other parameters. But for some reason i don't understand the accuracy i got when using tensorflow was lower. Is there any reason that this happened?
Here is my Neural net layer:
def cnn(x):
x = tflearn.layers.core.input_data(shape=[None, 50, 50, 3], placeholder=x)
conv_layer1 = tflearn.layers.conv.conv_2d(x, nb_filter=32, filter_size=5, activation='relu')
out_layer_1 = tflearn.layers.max_pool_2d(conv_layer1, 5)
conv_layer2 = tflearn.layers.conv.conv_2d(out_layer_1, nb_filter=64, filter_size=5, activation='relu')
out_layer_2 = tflearn.layers.max_pool_2d(conv_layer2, 5)
conv_layer3 = tflearn.layers.conv.conv_2d(out_layer_2, nb_filter=128, filter_size=5, activation='relu')
out_layer_3 = tflearn.layers.max_pool_2d(conv_layer3, 5)
conv_layer4 = tflearn.layers.conv.conv_2d(out_layer_3, nb_filter=64, filter_size=5, activation='relu')
out_layer_4 = tflearn.layers.max_pool_2d(conv_layer4, 5)
conv_layer5 = tflearn.layers.conv.conv_2d(out_layer_4, nb_filter=32, filter_size=5, activation='relu')
out_layer_5 = tflearn.layers.max_pool_2d(conv_layer5, 5)
fc1 = tflearn.layers.core.fully_connected(out_layer_5, 1024, activation='relu', name="FC1")
fc1_dropout = tflearn.layers.core.dropout(fc1, 0.5)
output = tflearn.layers.core.fully_connected(fc1_dropout, 2, activation='softmax', name='output')
return output
And here is my training function:
def train_model():
x = tf.placeholder(tf.float32, shape=[None, 50, 50, 3], name="x")
x_image = tf.reshape(x, [-1, 50, 50, 3])
y = tf.placeholder(tf.float32, shape=[None, 2], name="y")
y_cls = tf.argmax(y, dimension=1)
y_pred = cnn(x_image)
print "Importing Training Data..."
x_train = np.load('data/CatOrDog/training_images.npy')
y_train = np.load('data/CatOrDog/training_labels.npy')
y_train = [[1, 0] if label == 'Dog' else [0, 1] for label in y_train]
y_train = np.array(y_train)
randomer = np.arange(x_train.shape[0])
np.random.shuffle(randomer)
x_train = x_train[randomer]
y_train = y_train[randomer]
n_data = len(x_train)
x_train = np.array(x_train, dtype='float32')
print "Images Shape: ", x_train.shape, "\t", x_train.dtype
print "\nImporting Testing Data..."
x_test = np.load('data/CatOrDog/testing_images.npy')
y_test = np.load('data/CatOrDog/testing_labels.npy')
y_test = [[1, 0] if testing_label == 'Dog' else [0, 1] for testing_label in y_test]
y_test = np.array(y_test)
x_test = np.array(x_test, dtype='float32')
randomer = np.arange(x_test.shape[0])
np.random.shuffle(randomer)
x_test = x_test[randomer]
y_test = y_test[randomer]
n_data = len(x_train)
n_test_data =len(x_test)
'''divider = int(n_test_data / 2)
x_test_data = x_test[0:divider]
y_test_data = y_test[0:divider]
x_validation_data = x_test[divider+1:n_test_data-1]
y_validation_data = y_test[divider + 1:n_test_data - 1]'''
with tf.variable_scope("Softmax"):
y_pred_cls = tf.argmax(y_pred, dimension=1)
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y)
cost = tf.reduce_mean(cross_entropy)
with tf.name_scope("Optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
with tf.name_scope("Accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
writer = tf.summary.FileWriter("Training_FileWriter/")
writer1 = tf.summary.FileWriter("Validation_FileWriter/")
tf.summary.scalar('loss', cost)
tf.summary.scalar('accuracy', accuracy)
merged_summary = tf.summary.merge_all()
num_epochs = 10
batch_size = 300
with tf.Session() as sess:
# x = sess.graph.get_tensor_by_name('x')
sess.run(tf.global_variables_initializer())
writer.add_graph(sess.graph)
for epoch in range(num_epochs):
start_time = time.time()
train_accuracy = 0
cur_batch = int(n_data / batch_size)
prev_index = 0
bar = progressbar.ProgressBar(maxval=cur_batch)
bar.start()
for batch in range(0, cur_batch):
start, end = ClassifyData.get_batch_array_indexes(previous_index=prev_index, batch_size=batch_size, n_data=n_data)
if start == n_data:
break
x_batch = x_train[start:end]
y_true_batch = y_train[start:end]
feed_dict_train = {x: x_batch, y: y_true_batch}
sess.run(optimizer, feed_dict=feed_dict_train)
train_accuracy += sess.run(accuracy, feed_dict=feed_dict_train)
summ = sess.run(merged_summary, feed_dict=feed_dict_train)
writer.add_summary(summ, epoch * int(n_data / batch_size) + batch)
bar.update(batch)
bar.finish()
train_accuracy /= int(n_data / batch_size)
summ, vali_accuracy = sess.run([merged_summary, accuracy],
feed_dict={x: x_test, y: y_test})
writer1.add_summary(summ, epoch)
end_time = time.time()
print "\nEpoch " + str(epoch + 1) + " completed : Time usage " + str(
int(end_time - start_time)) + " seconds"
print "\tAccuracy:"
print "\t- Training Accuracy:\t{}".format(train_accuracy)
print "\t- Validation Accuracy:\t{}".format(vali_accuracy)
PS I am using tflearn to build my network.
My tensorflow version is 1.3.0 .
My python version is 3.5.
I implement CNN followed by bid-LSTM. and I run code on CPU.
After implementing CNN, pool2's shape will be [batch_size(None), None, 106, 64]. Then tf.reshape(pool2, [-1, tf.shape(pool2)[1], tf.shape(pool2)[2]xtf.shape(pool2)[3]]) . I hope tf.reshape can reshape 4D into 3D on pool2. And then feed it bid-LSTM, but tf.nn.bidirectional_dynamic_rnn happen wrong.
It says "Input size (depth of inputs) must be accessible via shape inference," ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
I haven't found the solution to the problem for a long time. Maybe I use wrong keyword to search on Internet. Or give some right keyword to me.
x = tf.placeholder('float', shape=[None, None, 108])
y = tf.placeholder('float')
n_steps = tf.placeholder('int64')
def CNN(x):
input_layer = tf.reshape(x, [-1, tf.shape(x)[1], 108, 1])
conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 3], padding="same", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=1)
conv2 = tf.layers.conv2d(inputs=conv1, filters=64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=1)
output = tf.reshape(pool2, [-1, tf.shape(pool2)[1], tf.shape(pool2)[2]*tf.shape(pool2)[3]])
return output
def recurrent_neural_network(x):
layer1 = {'weights':tf.Variable(tf.random_normal([rnn_size*2,n_classes])),'biases':tf.Variable(tf.random_normal([n_classes]))}
lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True)
lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=x, dtype=tf.float32) #[batch_size, max_time, cell_output_size]
outputs = tf.concat(outputs, 2)
max_length = tf.shape(outputs)[1]
outputs = tf.reshape(outputs, [-1, rnn_size*2])
prediction = tf.matmul(outputs,layer1['weights']) + layer1['biases']
prediction = tf.reshape(prediction, [-1, max_length, n_classes])
return prediction
def train_neural_network(x):
CNN_result = CNN(x)
prediction = recurrent_neural_network(CNN_result)
tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y) )
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
i=0
while i < len(train_X):
start = i
end = i+batch_size
batch_x = train_X[start:end]
batch_y = train_Y[start:end]
batch_sen_len = train_sen_len[start:end]
max_batch_sen_len = max(batch_sen_len)
#padding zero
for j in range(len(batch_x)):
k = max_batch_sen_len - len(batch_x[j])
for _ in range(k):
batch_x[j].append([0]*108)
batch_y[j].append([0]*48)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, n_steps: batch_sen_len})
epoch_loss += c
i+=batch_size
print('Epoch', epoch+1, 'completed out of',hm_epochs,'loss:',epoch_loss)
some value error in my code: ValueError: Variable d_conv2d1/weights/Adam/ does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope?
My tensorflow version is 1.2. The data set is mnist .The code is as follow:
# -*- coding: utf-8 -*-
import os
import numpy as np
import scipy.misc
import tensorflow as tf
from tensorflow.contrib.layers.python.layers import batch_norm as batch_norm
from tensorflow.examples.tutorials.mnist import input_data
import string
BATCH_SIZE = 64
def read_data():
#
mnist = input_data.read_data_sets("MNIST_data/", one_hot = True)
(x_train, y_train), (x_test, y_test) = tf.contrib.keras.datasets.mnist.load_data()
x_train = x_train.reshape((60000,784))
x_test = x_test.reshape((10000,784))
y_train_vec = np.zeros((len(y_train), 10), dtype=np.float)
for i, label in enumerate(y_train):
y_train_vec[i, int(y_train[i])] = 1.0
y_test_vec = np.zeros((len(y_test), 10), dtype=np.float)
for i, label in enumerate(y_test):
y_test_vec[i, int(y_test[i])] = 1.0
#
#
X = np.concatenate((x_train, x_test), axis=0)
y = np.concatenate((y_train_vec, y_test_vec), axis=0)
#
seed = 547
np.random.seed(seed)
np.random.shuffle(X)
np.random.seed(seed)
np.random.shuffle(y)
return X/255., y
# 常数偏置
def bias(name, shape, bias_start = 0.0, trainable = True):
dtype = tf.float32
var = tf.get_variable(name, shape, tf.float32, trainable = trainable,
initializer = tf.constant_initializer(
bias_start, dtype = dtype))
return var
# 随机权重
def weight(name, shape, stddev = 0.02, trainable = True):
dtype = tf.float32
var = tf.get_variable(name, shape, tf.float32, trainable = trainable,
initializer = tf.random_normal_initializer(
stddev = stddev, dtype = dtype))
return var
# 全连接层
def fully_connected(value, output_shape, name = 'fully_connected', with_w = False):
shape = value.get_shape().as_list()
with tf.variable_scope(name):
weights = weight('weights', [shape[1], output_shape], 0.02)
biases = bias('biases', [output_shape], 0.0)
if with_w:
return tf.matmul(value, weights) + biases, weights, biases
else:
return tf.matmul(value, weights) + biases
# Leaky-ReLu 层
def lrelu(x, leak=0.2, name = 'lrelu'):
with tf.variable_scope(name):
return tf.maximum(x, leak*x, name = name)
# ReLu 层
def relu(value, name = 'relu'):
with tf.variable_scope(name):
return tf.nn.relu(value)
# 解卷积层
def deconv2d(value, output_shape, k_h = 5, k_w = 5, strides =[1, 2, 2, 1],
name = 'deconv2d', with_w = False):
with tf.variable_scope(name):
weights = weight('weights',
[k_h, k_w, output_shape[-1], value.get_shape()[-1]])
deconv = tf.nn.conv2d_transpose(value, weights,
output_shape, strides = strides)
biases = bias('biases', [output_shape[-1]])
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
if with_w:
return deconv, weights, biases
else:
return deconv
# 卷积层
def conv2d(value, output_dim, k_h = 5, k_w = 5,
strides =[1, 2, 2, 1], name = 'conv2d'):
with tf.variable_scope(name):
weights = weight('weights',
[k_h, k_w, value.get_shape()[-1], output_dim])
conv = tf.nn.conv2d(value, weights, strides = strides, padding = 'SAME')
biases = bias('biases', [output_dim])
conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
return conv
# 把约束条件串联到 feature map
def conv_cond_concat(value, cond, name = 'concat'):
# 把张量的维度形状转化成 Python 的 list
value_shapes = value.get_shape().as_list()
cond_shapes = cond.get_shape().as_list()
# 在第三个维度上(feature map 维度上)把条件和输入串联起来,
# 条件会被预先设为四维张量的形式,假设输入为 [64, 32, 32, 32] 维的张量,
# 条件为 [64, 32, 32, 10] 维的张量,那么输出就是一个 [64, 32, 32, 42] 维张量
with tf.variable_scope(name):
return tf.concat( [value, cond * tf.ones(value_shapes[0:3] + cond_shapes[3:])],3)
# Batch Normalization 层
def batch_norm_layer(value, is_train = True, name = 'batch_norm'):
with tf.variable_scope(name) as scope:
if is_train:
return batch_norm(value, decay = 0.9, epsilon = 1e-5, scale = True,
is_training = is_train,
updates_collections = None, scope = scope)
else:
return batch_norm(value, decay = 0.9, epsilon = 1e-5, scale = True,
is_training = is_train, reuse = True,
updates_collections = None, scope = scope)
# 保存图片函数
def save_images(images, size, path):
"""
Save the samples images
The best size number is
int(max(sqrt(image.shape[0]),sqrt(image.shape[1]))) + 1
example:
The batch_size is 64, then the size is recommended [8, 8]
The batch_size is 32, then the size is recommended [6, 6]
"""
# 图片归一化,主要用于生成器输出是 tanh 形式的归一化
img = (images + 1.0) / 2.0
h, w = img.shape[1], img.shape[2]
# 产生一个大画布,用来保存生成的 batch_size 个图像
merge_img = np.zeros((h * size[0], w * size[1], 3))
# 循环使得画布特定地方值为某一幅图像的值
for idx, image in enumerate(images):
i = idx % size[1]
j = idx // size[1]
merge_img[j*h:j*h+h, i*w:i*w+w, :] = image
# 保存画布
return scipy.misc.imsave(path, merge_img)
# 定义生成器
def generator(z, y, train = True):
# y 是一个 [BATCH_SIZE, 10] 维的向量,把 y 转成四维张量
yb = tf.reshape(y, [BATCH_SIZE, 1, 1, 10], name = 'yb')
# 把 y 作为约束条件和 z 拼接起来
z = tf.concat([z, y], 1, name = 'z_concat_y')
# 经过一个全连接,BN 和激活层 ReLu
h1 = tf.nn.relu(batch_norm_layer(fully_connected(z, 1024, 'g_fully_connected1'),
is_train = train, name = 'g_bn1'))
# 把约束条件和上一层拼接起来
h1 = tf.concat([h1, y], 1, name = 'active1_concat_y')
h2 = tf.nn.relu(batch_norm_layer(fully_connected(h1, 128 * 49, 'g_fully_connected2'),
is_train = train, name = 'g_bn2'))
h2 = tf.reshape(h2, [64, 7, 7, 128], name = 'h2_reshape')
# 把约束条件和上一层拼接起来
h2 = conv_cond_concat(h2, yb, name = 'active2_concat_y')
h3 = tf.nn.relu(batch_norm_layer(deconv2d(h2, [64,14,14,128],
name = 'g_deconv2d3'),
is_train = train, name = 'g_bn3'))
h3 = conv_cond_concat(h3, yb, name = 'active3_concat_y')
# 经过一个 sigmoid 函数把值归一化为 0~1 之间,
h4 = tf.nn.sigmoid(deconv2d(h3, [64, 28, 28, 1],
name = 'g_deconv2d4'), name = 'generate_image')
return h4
# 定义判别器
def discriminator(image, y, reuse = False):
# 因为真实数据和生成数据都要经过判别器,所以需要指定 reuse 是否可用
if reuse:
tf.get_variable_scope().reuse_variables()
# 同生成器一样,判别器也需要把约束条件串联进来
yb = tf.reshape(y, [BATCH_SIZE, 1, 1, 10], name = 'yb')
x = conv_cond_concat(image, yb, name = 'image_concat_y')
# 卷积,激活,串联条件。
h1 = lrelu(conv2d(x, 11, name = 'd_conv2d1'), name = 'lrelu1')
h1 = conv_cond_concat(h1, yb, name = 'h1_concat_yb')
h2 = lrelu(batch_norm_layer(conv2d(h1, 74, name = 'd_conv2d2'),
name = 'd_bn2'), name = 'lrelu2')
h2 = tf.reshape(h2, [BATCH_SIZE, -1], name = 'reshape_lrelu2_to_2d')
h2 = tf.concat( [h2, y],1, name = 'lrelu2_concat_y')
h3 = lrelu(batch_norm_layer(fully_connected(h2, 1024, name = 'd_fully_connected3'),
name = 'd_bn3'), name = 'lrelu3')
h3 = tf.concat([h3, y], 1,name = 'lrelu3_concat_y')
# 全连接层,输出以为 loss 值
h4 = fully_connected(h3, 1, name = 'd_result_withouts_sigmoid')
return tf.nn.sigmoid(h4, name = 'discriminator_result_with_sigmoid'), h4
# 定义训练过程中的采样函数
def sampler(z, y, train = True):
tf.get_variable_scope().reuse_variables()
return generator(z, y, train = train)
def train():
# 设置 global_step ,用来记录训练过程中的 step
global_step = tf.Variable(0, name = 'global_step', trainable = False)
# 训练过程中的日志保存文件
train_dir = '/home/Zhoupinyi/DCGAN/logs'
# 放置三个 placeholder,y 表示约束条件,images 表示送入判别器的图片,
# z 表示随机噪声
y= tf.placeholder(tf.float32, [BATCH_SIZE, 10], name='y')
images = tf.placeholder(tf.float32, [64, 28, 28, 1], name='real_images')
z = tf.placeholder(tf.float32, [None, 100], name='z')
# 由生成器生成图像 G
G = generator(z, y)
# 真实图像送入判别器
D, D_logits = discriminator(images, y)
# 采样器采样图像
samples = sampler(z, y)
# 生成图像送入判别器
D_, D_logits_ = discriminator(G, y, reuse = True)
# 损失计算
d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( labels = tf.ones_like(D),logits = D_logits))
d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.zeros_like(D_), logits = D_logits_))
d_loss = d_loss_real + d_loss_fake
g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(D_), logits = D_logits_))
# 总结操作
z_sum = tf.summary.histogram("z", z)
d_sum = tf.summary.histogram("d", D)
d__sum = tf.summary.histogram("d_", D_)
G_sum = tf.summary.image("G", G)
d_loss_real_sum = tf.summary.scalar("d_loss_real", d_loss_real)
d_loss_fake_sum = tf.summary.scalar("d_loss_fake", d_loss_fake)
d_loss_sum = tf.summary.scalar("d_loss", d_loss)
g_loss_sum = tf.summary.scalar("g_loss", g_loss)
# 合并各自的总结
g_sum = tf.summary.merge([z_sum, d__sum, G_sum, d_loss_fake_sum, g_loss_sum])
d_sum = tf.summary.merge([z_sum, d_sum, d_loss_real_sum, d_loss_sum])
# 生成器和判别器要更新的变量,用于 tf.train.Optimizer 的 var_list
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if 'd_' in var.name]
g_vars = [var for var in t_vars if 'g_' in var.name]
saver = tf.train.Saver()
# 优化算法采用 Adam
optimizer = tf.train.AdamOptimizer(learning_rate = 0.0002, beta1 = 0.5)
d_optim = optimizer.minimize(d_loss, global_step = global_step, var_list = d_vars)
g_optim = optimizer.minimize(g_loss, global_step = global_step, var_list = g_vars)
os.environ['CUDA_VISIBLE_DEVICES'] = str(0)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.2
sess = tf.InteractiveSession(config=config)
init = tf.initialize_all_variables()
writer = tf.train.SummaryWriter(train_dir, sess.graph)
# 这个自己理解吧
data_x, data_y = read_data()
sample_z = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))
# sample_images = data_x[0: 64]
sample_labels = data_y[0: 64]
sess.run(init)
# 循环 25 个 epoch 训练网络
for epoch in range(25):
batch_idxs = 1093
for idx in range(batch_idxs):
batch_images = data_x[idx*64: (idx+1)*64]
batch_labels = data_y[idx*64: (idx+1)*64]
batch_z = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))
# 更新 D 的参数
_, summary_str = sess.run([d_optim, d_sum],
feed_dict = {images: batch_images,
z: batch_z,
y: batch_labels})
writer.add_summary(summary_str, idx+1)
# 更新 G 的参数
_, summary_str = sess.run([g_optim, g_sum],
feed_dict = {z: batch_z,
y: batch_labels})
writer.add_summary(summary_str, idx+1)
# 更新两次 G 的参数确保网络的稳定
_, summary_str = sess.run([g_optim, g_sum],
feed_dict = {z: batch_z,
y: batch_labels})
writer.add_summary(summary_str, idx+1)
# 计算训练过程中的损失,打印出来
errD_fake = d_loss_fake.eval({z: batch_z, y: batch_labels})
errD_real = d_loss_real.eval({images: batch_images, y: batch_labels})
errG = g_loss.eval({z: batch_z, y: batch_labels})
if idx % 20 == 0:
print("Epoch: [%2d] [%4d/%4d] d_loss: %.8f, g_loss: %.8f" \
% (epoch, idx, batch_idxs, errD_fake+errD_real, errG))
# 训练过程中,用采样器采样,并且保存采样的图片到
# /home/your_name/TensorFlow/DCGAN/samples/
if idx % 100 == 1:
sample = sess.run(samples, feed_dict = {z: sample_z, y: sample_labels})
samples_path = '/home/your_name/TensorFlow/DCGAN/samples/'
save_images(sample, [8, 8],
samples_path + 'test_%d_epoch_%d.png' % (epoch, idx))
print 'save down'
# 每过 500 次迭代,保存一次模型
if idx % 500 == 2:
checkpoint_path = os.path.join(train_dir, 'DCGAN_model.ckpt')
saver.save(sess, checkpoint_path, global_step = idx+1)
sess.close()
if __name__ == '__main__':
train()
The error information is as follow:
Traceback (most recent call last):
File "/home/zhoupinbyi/DCGAN/test_gan.py", line 370, in <module>
train()
File "/home/zhoupinbyi/DCGAN/test_gan.py", line 297, in train
d_optim = optimizer.minimize(d_loss, global_step = global_step, var_list = d_vars)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 325, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 446, in apply_gradients
self._create_slots([_get_variable_for(v) for v in var_list])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/adam.py", line 128, in _create_slots
self._zeros_slot(v, "m", self._name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 766, in _zeros_slot
named_slots[_var_key(var)] = slot_creator.create_zeros_slot(var, op_name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/slot_creator.py", line 174, in create_zeros_slot
colocate_with_primary=colocate_with_primary)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/slot_creator.py", line 146, in create_slot_with_initializer
dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/slot_creator.py", line 66, in _create_slot_var
validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 367, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 682, in _get_single_variable
"VarScope?" % name)
ValueError: Variable d_conv2d1/weights/Adam/ does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope?
the code can run in tensorflow 0.11, but in 1.2, you must add a code :
with tf.variable_scope(tf.get_variable_scope(), reuse=None):
above what you use your generator and discrimator function
I was trying to implement the new SELU activation function from https://arxiv.org/pdf/1706.02515. For more information here is my code:
import tensorflow as tf
import numpy as np
from PIL import Image
import os
from keras.activations import elu
batch_size = 32
def weight_variable(kernal_shape):
weights = tf.get_variable(name='weights', shape=kernal_shape, dtype=tf.float32, trainable=True,
initializer=tf.truncated_normal_initializer(stddev=0.02))
return weights
def bias_variable(shape):
initial = tf.constant(0.0, shape=shape)
return tf.Variable(initial)
def selu(x):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
#return scale * tf.where(x >= 0.0, x, alpha * tf.exp(x) - alpha)
return scale * elu(x, alpha)
def conv_layer(x, w_shape, b_shape, padding='SAME'):
W = weight_variable(w_shape)
tf.summary.histogram("weights", W)
b = bias_variable(b_shape)
tf.summary.histogram("biases", b)
# Note that I used a stride of 2 on purpose in order not to use max pool layer.
activations = selu(tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding=padding) + b)
tf.summary.histogram(activations.name, activations)
W1 = tf.shape(x)[1]
W2 = tf.shape(activations)[1]
F = w_shape[0]
P = tf.divide(tf.add(tf.subtract(tf.multiply(tf.subtract(W2, 1), 2), W1), F), 2)
return activations, P
def deconv_layer(x, w_shape, b_shape, padding="SAME"):
W = weight_variable(w_shape)
tf.summary.histogram("weights", W)
b = bias_variable(b_shape)
tf.summary.histogram('biases', b)
x_shape = tf.shape(x)
out_shape = tf.stack([x_shape[0], x_shape[1] * 2, x_shape[2] * 2, w_shape[2]])
# Note that I have used a stride of 2 since I used a stride of 2 in conv layer.
transposed_activations = tf.nn.conv2d_transpose(x, W, out_shape, [1, 2, 2, 1], padding=padding) + b
tf.summary.histogram(transposed_activations.name, transposed_activations)
return transposed_activations
tfrecords_filename_seq = ["P16_db.tfrecords"]
filename_queue = tf.train.string_input_producer(tfrecords_filename_seq, num_epochs=None, shuffle=False, name='queue')
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'annotation_raw': tf.FixedLenFeature([], tf.string)
})
# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
image = tf.reshape(image, [height, width, 3])
image = tf.cast([image], tf.float32)
with tf.variable_scope('conv1'):
conv1, P1 = conv_layer(image, [3, 3, 3, 32], [32]) # image size: [56, 56]
with tf.variable_scope('conv2'):
conv2, P2 = conv_layer(conv1, [3, 3, 32, 64], [64]) # image size: [28, 28]
with tf.variable_scope('conv3'):
conv3, P3 = conv_layer(conv2, [3, 3, 64, 128], [128]) # image size: [14, 14]
with tf.variable_scope('conv4'):
conv4, P4 = conv_layer(conv3, [3, 3, 128, 256], [256]) # image size: [7, 7]
conv4_reshaped = tf.reshape(conv4, [-1, 7 * 7 * 256], name='conv4_reshaped')
w_c = tf.Variable(tf.truncated_normal([7 * 7 * 256, 100], stddev=0.1), name='weight_fc')
b_c = tf.Variable(tf.constant(0.1, shape=[100]), name='biases_fc')
tf.summary.histogram('weights_c', w_c)
tf.summary.histogram('biases_c', b_c)
with tf.variable_scope('z'):
z = selu(tf.nn.bias_add(tf.matmul(conv4_reshaped, w_c), b_c))
tf.summary.histogram('features_z', z)
w_dc = tf.Variable(tf.truncated_normal([100, 7 * 7 * 256], stddev=0.1), name='weights_dc')
b_dc = tf.Variable(tf.constant(0.1, shape=[7 * 7 * 256]), name='biases_dc')
tf.summary.histogram('weights_dc', w_dc)
tf.summary.histogram('biases_dc', b_dc)
with tf.variable_scope('deconv4'):
deconv4 = selu(tf.nn.bias_add(tf.matmul(z, w_dc), b_dc))
deconv4_reshaped = tf.reshape(deconv4, [-1, 7, 7, 256], name='deconv4_reshaped')
with tf.variable_scope('deconv3'):
deconv3 = deconv_layer(deconv4_reshaped, [3, 3, 128, 256], [128])
with tf.variable_scope('deconv2'):
deconv2 = deconv_layer(deconv3, [3, 3, 64, 128], [64])
with tf.variable_scope('deconv1'):
deconv1 = deconv_layer(deconv2, [3, 3, 32, 64], [32])
with tf.variable_scope('deconv_image'):
deconv_image = deconv_layer(deconv1, [3, 3, 3, 32], [3])
with tf.name_scope('loss'):
loss = tf.reduce_mean(tf.abs(deconv_image - image))
tf.summary.scalar('loss', loss)
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(0.0001).minimize(loss)
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
saver = tf.train.Saver()
model_path = 'C:/Users/iayou005/Documents/tensorboard_logs/Graph_model/ckpt'
# Here is the session...
with tf.Session() as sess:
train_writer = tf.summary.FileWriter('C:/Users/iayou005/Documents/tensorboard_logs/New_Runs/DeconvNet', sess.graph)
merged = tf.summary.merge_all()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 0
sess.run(init_op)
# Note that the last name "Graph_model" is the name of the saved checkpoints file => the ckpt is saved
# under tensorboard_logs.
ckpt = tf.train.get_checkpoint_state(
os.path.dirname('C:/Users/iayou005/Documents/tensorboard_logs/Graph_model/ckpt'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('checkpoints are saved!!!')
else:
print('No stored checkpoints')
while step < 100000:
if step % 1000 == 0:
img = sess.run([deconv_image])
img2 = Image.fromarray(np.uint8(img[0][0]))
img2.save('Reconstructed' + str(step) + '.png', 'png')
__, loss_s, summary = sess.run([optimizer, loss, merged])
if step % 100 == 0:
train_writer.add_summary(summary, step)
print(loss_s)
step += 1
save_path = saver.save(sess, model_path)
coord.request_stop()
coord.join(threads)
train_writer.close()
So without using Keras I kept getting a NAN:
InvalidArgumentError (see above for traceback): Nan in summary histogram for: conv1/weights_1
[[Node: conv1/weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](conv1/weights_1/tag, conv1/weights/read/_61)]]
I would like to know the reason for getting a NAN.
Any help is much appreciated!!
I have a rather large network, and my GPU is running out of memory. It isn't a bug in any of the code, the network itself is simply too large to fit into memory. I've even tried the GPU config recommendations here.
For example, I've tried both of the gpu_options below...
gpu_options = tf.GPUOptions()
config = tf.ConfigProto(gpu_options=gpu_options)
config.gpu_options.allow_growth = True
# config.optimizer_options.opt_level = 2
# config.graph_options.enable_recv_scheduling = True
# config.graph_options.build_cost_model = 1
config.gpu_options.per_process_gpu_memory_fraction = 0.1
But I'm still running out of memory. I have been informed by GitHub user #girving here that Tensorflow doesn't handle Memory overflow (which makes no sense to me why they wouldn't implement this).
However, he also claimed there are workarounds. I can't find any support of anyone who has had to implement a workaround. Can anyone point me in the right direction? Can I implement Queuing somehow?
For reference, here is some code... the program runs out of memory at the time of sess(init)
#Kendall Weihe
#This is a CNN that handles 3D data
#Adjust network parameters below, also adjust data directory
import tensorflow as tf
import pdb
import numpy as np
from numpy import genfromtxt
from PIL import Image
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
from tensorflow.tensorflow.scroll import scroll_data
# Parameters
learning_rate = 0.001
training_iters = 1000000
batch_size = 1
display_step = 1
# Network Parameters
n_images = 100
n_input_x = 396 # Input image x-dimension
n_input_y = 396 # Input image y-dimension
n_input_z = 5
n_hidden = 128
n_classes = 2 # Binary classification -- on a surface or not
n_output = n_input_x * n_classes
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input_z, n_input_x, n_input_y])
y = tf.placeholder(tf.float32, [None, n_input_z, n_input_x, n_input_y, n_classes], name="ground_truth")
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
def input_data():
data = np.empty((n_images, n_input_x, n_input_y))
temp = []
for i in range(n_images):
filename = "/home/volcart/Documents/Data/input_crops/cropped00" + str(i) + ".tif"
im = Image.open(path)
imarray = np.array(im)
temp.append(imarray)
for i in range(n_images):
for j in range(n_input_x):
for k in range(n_input_y):
data[i][j][k] = temp[i][j][k]
return data
# Create some wrappers for simplicity
def conv3d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv3d(x, W, strides=[1, strides, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool3d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool3d(x, ksize=[1, k, k, k, 1], strides=[1, k, k, k, 1],
padding='SAME')
def deconv3d(prev_layer, w, b, output_shape, strides):
# Deconv layer
deconv = tf.nn.conv3d_transpose(prev_layer, w, output_shape=output_shape, strides=strides, padding="VALID")
deconv = tf.nn.bias_add(deconv, b)
deconv = tf.nn.relu(deconv)
return deconv
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, n_input_z, n_input_x, n_input_y, 1])
with tf.name_scope("conv1") as scope:
# Convolution Layer
conv1 = conv3d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
#conv1 = tf.nn.local_response_normalization(conv1)
conv1 = maxpool3d(conv1, k=2)
# Convolution Layer
with tf.name_scope("conv2") as scope:
conv2 = conv3d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
# conv2 = tf.nn.local_response_normalization(conv2)
conv2 = maxpool3d(conv2, k=2)
# Convolution Layer
with tf.name_scope("conv3") as scope:
conv3 = conv3d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling)
# conv3 = tf.nn.local_response_normalization(conv3)
conv3 = maxpool3d(conv3, k=2)
# pdb.set_trace()
temp_batch_size = tf.shape(x)[0] #batch_size shape
with tf.name_scope("deconv1") as scope:
output_shape = [temp_batch_size, 2, n_input_x / 4, n_input_y / 4, 16]
strides = [1,2,2,2,1]
#conv4 = deconv3d(conv3, weights['wdc1'], biases['bdc1'], output_shape, strides)
# conv4 = tf.nn.local_response_normalization(conv4)
conv4 = tf.nn.conv3d_transpose(conv3, weights['wdc1'], output_shape=output_shape, strides=strides, padding="SAME")
conv4 = tf.nn.bias_add(conv4, biases['bdc1'])
conv4 = tf.nn.relu(conv4)
with tf.name_scope("deconv2") as scope:
output_shape = [temp_batch_size, 3, n_input_x / 2, n_input_y / 2, 8]
strides = [1,1,2,2,1]
conv5 = deconv3d(conv4, weights['wdc2'], biases['bdc2'], output_shape, strides)
# conv5 = tf.nn.local_response_normalization(conv5)
with tf.name_scope("deconv3") as scope:
output_shape = [temp_batch_size, n_input_z, n_input_x, n_input_y, 1]
#this time don't use ReLu -- since output layer
conv6 = tf.nn.conv3d_transpose(conv5, weights['wdc3'], output_shape=output_shape, strides=[1,1,2,2,1], padding="VALID")
conv6 = tf.nn.bias_add(conv6, biases['bdc3'])
conv6 = tf.nn.dropout(conv6, dropout)
# conv6 = tf.nn.relu(conv6)
# pdb.set_trace()
x = tf.reshape(conv6, [-1, n_input_x])
x = tf.split(0, n_input_y * n_input_z, x)
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True, activation=tf.nn.relu)
# lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * n_hidden, state_is_tuple=True)
lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.75)
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
output = []
for i in xrange(n_input_y * n_input_z):
output.append(tf.matmul(outputs[i], lstm_weights[i]) + lstm_biases[i])
return output
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1' : tf.Variable(tf.random_normal([2, 2, 2, 1, 8])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2' : tf.Variable(tf.random_normal([2, 2, 2, 8, 16])),
# 5x5 conv, 32 inputs, 64 outputs
'wc3' : tf.Variable(tf.random_normal([2, 2, 2, 16, 32])),
'wdc1' : tf.Variable(tf.random_normal([2, 2, 2, 16, 32])),
'wdc2' : tf.Variable(tf.random_normal([2, 2, 2, 8, 16])),
'wdc3' : tf.Variable(tf.random_normal([3, 2, 2, 1, 8])),
}
biases = {
'bc1': tf.Variable(tf.random_normal([8])),
'bc2': tf.Variable(tf.random_normal([16])),
'bc3': tf.Variable(tf.random_normal([32])),
'bdc1': tf.Variable(tf.random_normal([16])),
'bdc2': tf.Variable(tf.random_normal([8])),
'bdc3': tf.Variable(tf.random_normal([1])),
}
lstm_weights = {}
lstm_biases = {}
for i in xrange(n_input_y * n_input_z):
lstm_weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
lstm_biases[i] = tf.Variable(tf.random_normal([n_output]))
# Construct model
with tf.name_scope("net") as scope:
print "Building network..."
pred = conv_net(x, weights, biases, keep_prob)
print "Network built!"
# pdb.set_trace()
pred = tf.transpose(tf.pack(pred),[1,0,2])
pred = tf.reshape(pred, [-1, n_input_z, n_input_x, n_input_y, n_classes])
# Reshape for cost function
temp_pred = tf.reshape(pred, [-1, n_classes])
temp_y = tf.reshape(y, [-1, n_classes])
with tf.name_scope("loss") as scope:
# cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
cost = (tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))
with tf.name_scope("opt") as scope:
print "Initializing optimizer..."
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
print "optimizer initialized!"
# pdb.set_trace()
# Evaluate model
with tf.name_scope("acc") as scope:
# accuracy is the difference between prediction and ground truth matrices
correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.name_scope("prediction-node") as scope:
prediction_node = tf.nn.sigmoid(temp_pred)
# Initializing the variables
with tf.name_scope("initialize-and-config") as scope:
print "Initializing variables & configuring..."
init = tf.initialize_all_variables()
saver = tf.train.Saver()
gpu_options = tf.GPUOptions()
config = tf.ConfigProto(gpu_options=gpu_options)
config.gpu_options.allow_growth = True
# config.optimizer_options.opt_level = 2
# config.graph_options.enable_recv_scheduling = True
# config.graph_options.build_cost_model = 1
config.gpu_options.per_process_gpu_memory_fraction = 0.1
print "Variables and configurations initialized!"
# Launch the graph
with tf.Session(config=config) as sess:
print "Initializing session..."
sess.run(init)
print "Session initialized!"
print "Restoring session..."
saver.restore(sess, "/home/volcart/Documents/3D-CNN-2D-LSTM-reg-model/model.ckpt")
print "Session restored!"
tf.get_default_graph().finalize()
# Import data
print "Importing data..."
data = input_data()
print "Data imported!"
# Keep training until reach max iterations
for i in range(n_images):
print "Prediction image number -- " + str(i)
temp = []
for j in range(n_input_z):
temp.append(data[j,:,:])
temp = np.asarray(temp)
temp = temp.reshape((1, n_input_z, n_input_x, n_input_y))
prediction = sess.run(prediction_node, feed_dict={x: temp, keep_prob: 1.0})
prediction = prediction.reshape((n_input_x, n_input_y, n_classes))
temp_arr1 = np.empty((n_input_x, n_input_y))
for i in xrange(n_input_x):
for j in xrange(n_input_y):
if l == 0:
temp_arr1[i][j] = prediction[i][j][0]
csv_file = "/home/volcart/Documents/3D-CNN-2D-LSTM-pred/3D-CNN-2D-LSTM-step-" + str(i) + ".csv"
np.savetxt(csv_file, temp_arr1, delimiter=",")