I am looking at the examples/image_orc.py example in Keras, when I run it I see something like
_______________
max2 (MaxPooling2D) (None, 32, 16, 16) 0 conv2[0][0]
____________________________________________________________________________________________________
reshape (Reshape) (None, 32, 256) 0 max2[0][0]
____________________________________________________________________________________________________
dense1 (Dense) (None, 32, 32) 8224 reshape[0][0]
_____________________________________________________________________________________
The Dense layer outputs a tensor 32x32. I am trying to replicate this in pur TensorFlow where tf.matmul would be used, but how can I output 32x32 using matmul?
Addition:
I am not trying to replicate the Keras example exactly,
w = 128; h = 64
# junk image, only one
dataset = np.zeros((1,w,h,1))
import tensorflow as tf
pool_size = 1
num_filters = 16
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
inputs = tf.placeholder(tf.float32, [None, w, h, 1])
W_conv1 = weight_variable([3, 3, 1, num_filters])
b_conv1 = bias_variable([num_filters])
h_conv1 = tf.nn.relu(conv2d(inputs, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([3, 3, num_filters, num_filters])
b_conv2 = bias_variable([num_filters])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 32, 256])
W_fc1 = weight_variable([256, 32])
b_fc1 = bias_variable([32])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
print inputs.shape
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output = sess.run(h_pool2_flat, feed_dict={inputs: dataset})
print 'output',output.shape
And I get
ValueError: Shape must be rank 2 but is rank 3 for 'MatMul_5' (op: 'MatMul') with input shapes: [?,32,256], [256,32].
A smaller example
import numpy as np
import tensorflow as tf
dataset = np.zeros((3,2,4))
inputs = tf.placeholder(tf.float32, [None, 2, 4])
print inputs
W = tf.zeros((4,5))
print W
W2 = tf.matmul(inputs, W)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output = sess.run(W2, feed_dict={inputs: dataset})
print 'output',output.shape
This also gives similar error
ValueError: Shape must be rank 2 but is rank 3 for 'MatMul_12' (op: 'MatMul') with input shapes: [?,2,4], [4,5].
Any ideas?
Thanks,
That 32 is there because it was in the previous layer. It keeps unchanged.
The tf.matmul multiplies considering the two last dimensions, as stated here. (See the examples taking more than two dimensions)
I see you've got a Dense(32) there, with input size = 256.
This means that the weights matrix is (256,32). In keras, the multiplication as seen here is inputs x kernel.
So, if you have the input tensor shaped as (?, any, 256), and the weights matrix shaped as (256,32), all you need is:
output = tf.matmul(input,weights)
This will output a shape (?, any, 32) - any is there untouched because it just was there before.
You may also want to sum the biases, which will follow the same principle. You need a bias vector of shape (32,).
Related
System information
OS Platform and Distribution :CentOS Linux release 7.7.1908
-TensorFlow version:2.3.0
I try to convert the tensorflow offical image caption model to TFLite model
I try to convert the tf.keras.Model 's encoder and decoder model as following:
import tensorflow as tf
embedding_dim = 256
units = 512
top_k = 5000
vocab_size = top_k + 1
features_shape = 2048
attention_features_shape = 64
class BahdanauAttention(tf.keras.Model):
def __init__(self, utils):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(utils)
self.W2 = tf.keras.layers.Dense(utils)
self.V = tf.keras.layers.Dense(1)
def call(self, features, hidden):
# features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)
# hidden shape == (batch_size, hidden_size)
# hidden_with_time_axis shape == (batch_size, 1, hidden_size)
hidden_with_time_axis_shape = tf.expand_dims(hidden, 1)
# score shape == (batch_size, 64, hidden_size)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis_shape))
# attention_weights shape == (batch_size, 64, 1)
# you get 1 at the last axis because you are applying score to self.V
attention_weights = tf.nn.softmax(self.V(score), axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class CNN_Encoder(tf.keras.Model):
#由于您已经提取了特征并使用pickle进行了转储
#该编码器通过完全连接的层传递这些特征
def __init__(self, embedding):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim)
self.fc = tf.keras.layers.Dense(embedding_dim)
# #tf.function(input_signature=[tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.float32)])
#tf.function
def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x
class RNN_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_Decoder, self).__init__()
self.units = units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform',
unroll = True)
self.fc1 = tf.keras.layers.Dense(self.units)
self.fc2 = tf.keras.layers.Dense(vocab_size)
self.attention = BahdanauAttention(self.units)
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name='x'),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32, name='feature'),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32, name='hidden')])
#tf.function
def call(self, x , features, hidden):
#将注意力定义为一个单独的模型
context_vector, attention_weights = self.attention(features, hidden)
#x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
#x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
#将concated后的的向量传递给GRU
output, state = self.gru(x)
#shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
#x shape == (batch_size, max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)
return x, state, attention_weights
def reset_states(self, batch_size):
return tf.zeros((batch_size, self.units))
encoder = CNN_Encoder(embedding_dim)
decoder = RNN_Decoder(embedding_dim, units, vocab_size)`
encoder._set_inputs(tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.float32))
decoder._set_inputs([tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name='x'),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32, name='feature'),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32, name='hidden')])
encoder_converter = tf.lite.TFLiteConverter.from_keras_model(encoder)
decoder_converter = tf.lite.TFLiteConverter.from_keras_model(decoder)
encoder_model = encoder_converter.convert()
decoder_model = decoder_converter.convert()
open("encoder_model.tflite", "wb").write(encoder_model)
open("decoder_model.tflite", "wb").write(decoder_model)
The error messge is
ValueError: Structure of Python function inputs does not match input_signature:
inputs: (
[<tf.Tensor 'x:0' shape=(1, 1) dtype=int32>, <tf.Tensor 'feature:0' shape=(1, 64, 256) dtype=float32>, <tf.Tensor 'hidden:0' shape=(1, 512) dtype=float32>])
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int32, name='x'),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name='feature'),
TensorSpec(shape=(1, 512), dtype=tf.float32, name='hidden'))
I think the function input is the same as the input signature.How can I fix the problem?
i also had the same issue. found a simple solution to this. the call method of tf.keras.Model class accepts only single input, so you need to pack all your inputs inside a list/tuple
https://github.com/tensorflow/tensorflow/issues/32488#issuecomment-560248754
I am training a DCGAN model with tensorflow.keras, and I added BatchNormalization layers in both generator and discriminator.
I train gan with following steps:
1. train discriminator with real images and images from generator(using generator.predict)
2. train adversarial network(compiled with discriminator.trainable=False)
Then I found that after a few rounds the training loss returned by train_on_batch() of both generator and discriminator goes to zero. But when I use test_on_batch() the loss is still huge for generator. And the generated images are all mess.
At first I thought that it is because in the 2.'s step mentioned above when training the adversarial network, the discriminator's input only containing fake images makes the batch normalization layers get different distribution as the 1.'s step when both fake & real images were fed.
But even if I removed all batch normalization layers in discriminator, the same problem still exists. Only when all batch normalization layers were removed the problem will disappear. Also I found out that existences of Dropout layers don't make a difference. I wondering why batch normalization can cause such problem, even if in generator fed with noises with same distribution.
# Model definition
class DCGAN_128:
def __init__(self, hidden_dim):
generator = M.Sequential()
generator.add(L.Dense(128 * 8 * 8, input_shape=[hidden_dim]))
generator.add(L.Reshape([8, 8, 128]))
generator.add(L.UpSampling2D()) # [8, 8, 128]
generator.add(L.Conv2D(128, kernel_size=3, padding="same")) # [16, 16, 128]
generator.add(L.LayerNormalization()) # 4
generator.add(L.ReLU())
generator.add(L.UpSampling2D()) # [32, 32, 128]
generator.add(L.Conv2D(64, kernel_size=5, padding="same")) # [32, 32, 64]
generator.add(L.LayerNormalization()) # 8
generator.add(L.ReLU())
generator.add(L.UpSampling2D()) # [64, 64, 128]
generator.add(L.Conv2D(32, kernel_size=7, padding="same")) # [64, 64, 32]
generator.add(L.LayerNormalization()) # 12
generator.add(L.ReLU())
generator.add(L.UpSampling2D()) # [128, 128, 32]
generator.add(L.Conv2D(3, kernel_size=3, padding="same", activation=A.sigmoid)) # [128, 128, 3]
discriminator = M.Sequential()
discriminator.add(L.Conv2D(32, kernel_size=5, strides=2, padding="same", input_shape=[128, 128, 3]))
discriminator.add(L.LeakyReLU())
# discriminator.add(L.Dropout(0.25)) # [64, 64, 32]
discriminator.add(L.Conv2D(64, kernel_size=3, strides=2, padding="same"))
# discriminator.add(L.BatchNormalization(epsilon=1e-5)) # 4
discriminator.add(L.LeakyReLU())
# discriminator.add(L.Dropout(0.25)) # [32, 32, 64]
discriminator.add(L.Conv2D(128, kernel_size=3, strides=2, padding="same"))
discriminator.add(L.LayerNormalization()) # 8
discriminator.add(L.LeakyReLU()) # [16, 16, 128]
discriminator.add(L.Dropout(0.25))
discriminator.add(L.Conv2D(256, kernel_size=3, strides=2, padding="same"))
discriminator.add(L.LayerNormalization()) # 12
discriminator.add(L.LeakyReLU()) # [8, 8, 256]
discriminator.add(L.Dropout(0.25))
discriminator.add(L.Conv2D(512, kernel_size=3, strides=2, padding="same"))
discriminator.add(L.LeakyReLU()) # [4, 4, 512]
discriminator.add(L.Flatten())
discriminator.add(L.Dense(1, activation=A.sigmoid))
self.model_gen = generator
self.model_dis = discriminator
self.adv_input = L.Input([hidden_dim])
self.adv_output = discriminator(generator(self.adv_input))
self.model_adversarial = M.Model(self.adv_input, self.adv_output)
# Training
dcgan = hidden_dim = 100
DCGAN_128(hidden_dim)
data_loader = AnimeFacesLoader([128, 128])
batch_size = 32
n_rounds = 40000
dis_model = dcgan.model_dis
gen_model = dcgan.model_gen
adv_model = dcgan.model_adversarial
gen_model.summary()
adv_model.summary()
dis_model.compile(Opt.Adam(0.0002), Lo.binary_crossentropy)
dis_model.trainable = False
adv_model.compile(Opt.Adam(0.0002), Lo.binary_crossentropy)
layer_outputs = [layer.output for layer in dis_model.layers]
visual_model = tf.keras.Model(dis_model.input, layer_outputs)
for rounds in range(n_rounds):
# Get output images
if rounds % 100 == 0 and rounds > 0:
noise = np.random.uniform(-1, 1, [16, hidden_dim])
tiled_images = np.zeros([4*128, 4*128, 3]).astype(np.uint8)
generated_imgs = gen_model.predict(noise)
generated_imgs *= 256
generated_imgs = generated_imgs.astype(np.uint8)
for i in range(16):
tiled_images[int(i / 4)*128: int(i / 4)*128 + 128,
int(i % 4)*128: int(i % 4)*128 + 128, :] = generated_imgs[i, :, :, :]
Image.fromarray(tiled_images).save("Output/DCGAN/" + "rounds_{0}.jpg".format(rounds))
'''
layer_visualization = visual_model.predict(generated_imgs[:1])
for i in range(len(layer_visualization)):
plt.imshow(layer_visualization[i][0, :, :, 0])
plt.show()
'''
# train discriminator on real & fake images
real_imgs = data_loader.get_batch(batch_size)
real_ys = np.ones([batch_size, 1])
noise = np.random.uniform(-1, 1, [batch_size, hidden_dim])
fake_ys = np.zeros([batch_size, 1])
fake_imgs = gen_model.predict(noise)
imgs = np.concatenate([real_imgs, fake_imgs], axis=0)
ys = np.concatenate([real_ys, fake_ys], axis=0)
loss_dis = dis_model.train_on_batch(imgs, ys)
print("Round {}, Loss dis:{:.4f}".format(rounds, loss_dis))
loss_dis_test = dis_model.test_on_batch(imgs, ys)
print(loss_dis_test)
noise = np.random.uniform(-1, 1, [batch_size, hidden_dim])
fake_ys = np.ones([batch_size, 1])
loss_gen = adv_model.train_on_batch(noise, fake_ys)
print("Round {}, Loss gen:{:.4f}".format(rounds, loss_gen))
loss_gen_test = adv_model.test_on_batch(noise, fake_ys)
print(loss_gen_test)
I've looked at some simple CNNs from this tutorial video https://www.youtube.com/watch?v=hSDrJM6fJFM and modified the code so that it uses a MomentumOptimizer instead of AdamOptimizer, and made it so the training runs indefinitely instead of just to 1000. Around the 9500 value of i, the accuracy always goes to 0.085 and remains stuck there permanently, never changing after this. I am very confused as to what could cause this. As it progresses fine, increasing the accuracy gradually, then suddenly drops to worse than random guessing and stays there forever.
from __future__ import print_function
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})
return result
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
# stride [1, x_movement, y_movement, 1]
# Must have strides[0] = strides[3] = 1
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
# stride [1, x_movement, y_movement, 1]
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 784])/255. # 28x28
ys = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, [-1, 28, 28, 1])
# print(x_image.shape) # [n_samples, 28,28,1]
## conv1 layer ##
W_conv1 = weight_variable([5,5, 1,32]) # patch 5x5, in size 1, out size 32
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # output size 28x28x32
h_pool1 = max_pool_2x2(h_conv1) # output size 14x14x32
## conv2 layer ##
W_conv2 = weight_variable([5,5, 32, 64]) # patch 5x5, in size 32, out size 64
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64
h_pool2 = max_pool_2x2(h_conv2) # output size 7x7x64
## fc1 layer ##
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
# [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64]
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
## fc2 layer ##
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
# the error between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),reduction_indices=[1])) # loss
train_step = tf.train.MomentumOptimizer(1e-4, 0.999).minimize(cross_entropy)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(100000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 0.5})
if i % 50 == 0:
print(i, compute_accuracy(mnist.test.images[:1000], mnist.test.labels[:1000]))
This is the code I've run, I'd really appreciate any help solving this mystery thank you.
this question has been asked several times already, but I don't seem to be able to adapt previous solutions to my code. I would therefore appreciate any advice on how to solve this. I have tried using pdb and set a trace point right before the problem, which didn't give me much information.
I am adapting this tutorial to my problem:
https://www.oreilly.com/ideas/visualizing-convolutional-neural-networks
Data Shape:
x_train.shape: (1161, 68, 68, 1)
x_test.shape: (216, 68, 68, 1)
y_test.shape: (216,)
y_train.shape: (1161,)
Where the error occurs:
#Train the Model
steps = int(x_train.shape[0]/batchSize)
for i in range(numEpochs):
print(i)
accHist = []
accHist2 = []
#x_train, y_train = imf.shuffle(x_train, y_train)
for j in range(steps):
print(j)
#Calculate our current step
step = i * steps + j
#Feed forward batch of train images into graph and log accuracy
acc = sess.run([accuracy], feed_dict={X: x_train[(j*batchSize):((j+1)*batchSize),:,:,:], Y_: np.array(y_train[(j*batchSize):((j+1)*batchSize)]).reshape(1,30), keepRate1: 1, keepRate2: 1})
print(accHist)
accHist.append(acc)
#Back propigate using adam optimizer to update weights and biases.
sess.run(train_step, feed_dict={X: x_train[(j*batchSize):((j+1)*batchSize),:,:,:], Y_: np.array(y_train[(j*batchSize):((j+1)*batchSize)]).reshape(1,30), keepRate1: 0.2, keepRate2: 0.5})
print("success")
print('Epoch number {} Training Accuracy: {}'.format(i+1, np.mean(accHist)))
#Feed forward all test images into graph and log accuracy
for k in range(int(x_test.shape[0]/batchSize)):
acc = sess.run(accuracy, feed_dict={X: x_test[(k*batchSize):((k+1)*batchSize),:,:,:], Y_: np.array(y_test[(k*batchSize):((k+1)*batchSize)]).reshape(1,30), keepRate1: 1, keepRate2: 1})
accHist2.append(acc)
print("Test Set Accuracy: {}".format(np.mean(accHist2)))
I am getting the following error message:
InvalidArgumentError: logits and labels must be same size: logits_size=[30,30] labels_size=[1,30]
[[Node: cross_entropy_7/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](cross_entropy_7/Reshape, cross_entropy_7/Reshape_1)]]
Following the tutorial, I thought the logins were set here:
#FULLY CONNECTED 3 & SOFTMAX OUTPUT
with tf.name_scope('softmax') as scope:
fc2w = tf.Variable(tf.truncated_normal([512, classes], dtype=tf.float32,
stddev=1e-1), name='weights3_2')
fc2b = tf.Variable(tf.constant(1.0, shape=[classes], dtype=tf.float32),
trainable=True, name='biases3_2')
Ylogits = tf.nn.bias_add(tf.matmul(fc1_drop, fc2w), fc2b)
Y = tf.nn.softmax(Ylogits)
print(Ylogits.shape) here gives me: (?, 30). Classes is set at 30 so this seems to make sense.
This seems to be the functions that doesn't work, so I printed the shapes:
with tf.name_scope('cross_entropy'):
print(Ylogits.shape)
print(Y.shape)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
loss = tf.reduce_mean(cross_entropy)
Which gave me:
(?, 30)
(?, 30)
When executing the line for back propagation above though this does not seem to work. Can anyone help?
In response to comment (this basically is the tutorial code from the link mentioned above):
Place Holders:
classes = 30
X = tf.placeholder(tf.float32, name="X-placeholder", shape=(None, 68, 68, 1))
Y_ = tf.placeholder(tf.float32, [None, classes], name="Y_-placeholder")
keepRate1 = tf.placeholder(tf.float32, name="keepRate1-placeholder")
keepRate2 = tf.placeholder(tf.float32, name="keepRate2-placeholder")
Model:
# CONVOLUTION 1 - 1
with tf.name_scope('conv1_1'):
filter1_1 = tf.Variable(tf.truncated_normal([3, 3, 1, 32], dtype=tf.float32,
stddev=1e-1), name='weights1_1')
stride = [1,1,1,1]
conv = tf.nn.conv2d(X, filter1_1, stride, padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[32], dtype=tf.float32),
trainable=True, name='biases1_1')
out = tf.nn.bias_add(conv, biases)
conv1_1 = tf.nn.relu(out)
# CONVOLUTION 1 - 2
with tf.name_scope('conv1_2'):
filter1_2 = tf.Variable(tf.truncated_normal([3, 3, 32, 32], dtype=tf.float32,
stddev=1e-1), name='weights1_2')
conv = tf.nn.conv2d(conv1_1, filter1_2, [1,1,1,1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[32], dtype=tf.float32),
trainable=True, name='biases1_2')
out = tf.nn.bias_add(conv, biases)
conv1_2 = tf.nn.relu(out)
# POOL 1
with tf.name_scope('pool1'):
pool1_1 = tf.nn.max_pool(conv1_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool1_1')
pool1_1_drop = tf.nn.dropout(pool1_1, keepRate1)
# CONVOLUTION 2 - 1
with tf.name_scope('conv2_1'):
filter2_1 = tf.Variable(tf.truncated_normal([3, 3, 32, 64], dtype=tf.float32,
stddev=1e-1), name='weights2_1')
conv = tf.nn.conv2d(pool1_1_drop, filter2_1, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases2_1')
out = tf.nn.bias_add(conv, biases)
conv2_1 = tf.nn.relu(out)
# CONVOLUTION 2 - 2
with tf.name_scope('conv2_2'):
filter2_2 = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32,
stddev=1e-1), name='weights2_2')
conv = tf.nn.conv2d(conv2_1, filter2_2, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases2_2')
out = tf.nn.bias_add(conv, biases)
conv2_2 = tf.nn.relu(out)
# POOL 2
with tf.name_scope('pool2'):
pool2_1 = tf.nn.max_pool(conv2_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2_1')
pool2_1_drop = tf.nn.dropout(pool2_1, keepRate1)
#FULLY CONNECTED 1
with tf.name_scope('fc1') as scope:
shape = int(np.prod(pool2_1_drop.get_shape()[1:]))
fc1w = tf.Variable(tf.truncated_normal([shape, 512], dtype=tf.float32,
stddev=1e-1), name='weights3_1')
fc1b = tf.Variable(tf.constant(1.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases3_1')
pool2_flat = tf.reshape(pool2_1_drop, [-1, shape])
out = tf.nn.bias_add(tf.matmul(pool2_flat, fc1w), fc1b)
fc1 = tf.nn.relu(out)
fc1_drop = tf.nn.dropout(fc1, keepRate2)
#FULLY CONNECTED 3 & SOFTMAX OUTPUT
with tf.name_scope('softmax') as scope:
fc2w = tf.Variable(tf.truncated_normal([512, classes], dtype=tf.float32,
stddev=1e-1), name='weights3_2')
fc2b = tf.Variable(tf.constant(1.0, shape=[classes], dtype=tf.float32),
trainable=True, name='biases3_2')
Ylogits = tf.nn.bias_add(tf.matmul(fc1_drop, fc2w), fc2b)
Y = tf.nn.softmax(Ylogits)
numEpochs = 400
batchSize = 30
alpha = 1e-5
with tf.name_scope('cross_entropy'):
print(Ylogits.shape)
print(Y.shape)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
loss = tf.reduce_mean(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.name_scope('train'):
train_step = tf.train.AdamOptimizer(learning_rate=alpha).minimize(loss)
#Create Session and insert variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
The tensor shape (?, 30) denotes that the batch size is not set, so you can feed any batch size data to your graph, the problem is that then you can run into these kinds of problems, and need to keep track of the tensor shapes in your head.
The thing you need to fix is: either you have 30 images in one batch, but only 1 label in one batch, which needs to be fixed, because you cannot compute loss for 30 images with only one label, you either need to decrease number of images to 1 or increase label batch size to 30, it could also be that somewhere you are reshaping the tensors incorrectly.
I would look at where you read your data in, and then batch it, that is most likely where the problem will be, or at places where you are reshaping them.
Post your entire code, it would be more helpful.
I am attempting to recreate a cnn from a research paper, but I am still new to deep learning.
I am given a 3d patch of size 32x32x7. I first want to perform a convolution of size 3x3 with 32 features and a stride of 2. Then from that result, I need to perform a 3x3x4 convolution with 64 features and a stride of 1. I do not want to pool or have an activation function between the two convolutions. Why can't I just feed the results of my first convolution into the second one?
import tensorflow as tf
sess = tf.InteractiveSession()
def conv3d(tempX, tempW):
return tf.nn.conv3d(tempX, tempW, strides=[2, 2, 2, 2, 2],
padding='SAME')
def conv3d_s1(tempX, tempW):
return tf.nn.conv3d(tempX, tempW, strides=[1, 1, 1, 1, 1],
padding='SAME')
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
x = tf.placeholder(tf.float32, shape=[None, 7168])
y_ = tf.placeholder(tf.float32, shape=[None, 3])
W = tf.Variable(tf.zeros([7168,3]))
#first convolution
W_conv1 = weight_variable([3, 3, 1, 1, 32])
x_image = tf.reshape(x, [-1, 32, 32, 7, 1])
h_conv1 = conv3d(x_image, W_conv1)
#second convolution
W_conv2 = weight_variable([3, 3, 4, 1, 64])
h_conv2 = conv3d_s1(h_conv1, W_conv2)
Thank you!
After first conv3d you have tensor with shape [None, 16, 16, 4, 32], therefore you have to use kernel with shape [3, 3, 4, 32, 64] in the second conv3d_s1.