Matrix size-incompatible for custom multi model - tensorflow

I am getting the following error:
Node: 'BGNet/dense/BiasAdd'
Matrix size-incompatible: In[0]: [1120,0], In[1]: [2048,1024]
[[{{node BGNet/dense/BiasAdd}}]] [Op:__inference_train_function_11676]
I found the root in this part of the model:
File "<ipython-input-14-3dcbdf5337b8>", line 69, in call
f = self.dense(f)
This is my custom multi model:
class BGNet(tf.keras.Model):
def __init__(self, img_h, img_w, img_c, batch_size, classes):
super(BGNet, self).__init__(name='BGNet')
self.img_h = img_h
self.img_w = img_w
self.img_c = img_c
self.batch_size = batch_size
self.classes = classes
# (224, 224, 3)
self.bgblock0 = BGBlock(f=[32, 32, 32, 32],
k=[7, 5, 5, 5],
d=[1, 2, 2, 1],
stage=0)
# (112, 112, 32)
self.bgblock1 = BGBlock(f=[64, 64, 64, 64],
k=[5, 5, 5, 3],
d=[2, 1, 1, 2],
stage=1)
# (56, 56, 64)
self.bgblock2 = BGBlock(f=[128, 128, 128, 128],
k=[5, 5, 3, 3],
d=[2, 1, 2, 1],
stage=2)
# (28, 28, 128)
self.bgblock3 = BGBlock(f=[256, 256, 256, 256],
k=[5, 3, 3, 3,],
d=[1, 2, 1, 2],
stage=3)
# (14, 14, 256)
self.bgblock4 = BGBlock(f=[512, 512, 512],
k=[3, 3, 3],
d=[1, 1, 2],
stage=4)
# (7, 7, 512)
self.bgblock5 = BGBlock(f=[1024, 1024, 1024],
k=[3, 3, 1],
d=[2, 1, 1],
stage=5)
# (4, 4, 1024)
self.bgblock6 = BGBlock(f=[2048, 2048],
k=[1, 1],
d=[1, 2],
stage=6)
# (2, 2, 2048)
self.flatten = tf.keras.layers.Flatten(name='flatten')
self.dense = tf.keras.layers.Dense(1024, activation='tanh', name='dense')
self.dropout = tf.keras.layers.Dropout(0.2, name='dropout')
self.prob = tf.keras.layers.Dense(1, activation='sigmoid', name='prob')
self.concat1 = tf.keras.layers.Concatenate(axis=-1, name='concat1')
self.bbox1 = tf.keras.layers.Dense(512, activation='relu', name='bbox1')
self.bbox2 = tf.keras.layers.Dropout(0.1, name='bbox2')
self.bbox3 = tf.keras.layers.Dense(256, activation='sigmoid', name='bbox3')
self.bbox = tf.keras.layers.Dense(4, name='bbox')
self.concat2 = tf.keras.layers.Concatenate(axis=-1, name='concat2')
self.cat = tf.keras.layers.Dense(len(self.classes), activation='softmax', name='cat')
def call(self, input_tensor, training=True):
x = self.bgblock0(input_tensor)
x = self.bgblock1(x)
x = self.bgblock2(x)
x = self.bgblock3(x)
x = self.bgblock4(x)
x = self.bgblock5(x)
x = self.bgblock6(x)
f = self.flatten(x)
f = self.dense(f)
f = self.dropout(f)
p = self.prob(f)
b = self.concat1([f, p])
b = self.bbox1(b)
b = self.bbox2(b)
b = self.bbox3(b)
b = self.bbox(b)
c = self.concat2([f, b])
c = self.cat(c)
return {'prob': p, 'bbox': b, 'class': c}
model1 = BGNet(H, W, C, B, N)
model1.build(input_shape=(B, H, W, C))
model1.call(tf.keras.layers.Input(shape=(H, W, C), batch_size=B))
model1.summary(print_fn=tf.print, expand_nested=True, show_trainable=True)
The custom (BGBlocks) blocks are not that important but if you are curious they are convolution blocks consisting of conv2d, batchnorm, activation and pooling layers
The model produces 3 outputs of different size vector while sharing the first dense layers. The output layers first predict the confidence score(prob in loss) of the an object being in the image. Next they predict the bounding box(bbox in loss) and finally the class(class in loss) of the bounded object.
The main issue is after the flatten layer. The model builds without errors with input images of (224, 224, 3). This is how the summary of the model looks: model.summary() image
I have even created a custom IOU (Intersection Over Union) for bounding boxes to be used as model metric. The losses are simple, inbuilt and as follows:
loss = {'prob': 'binary_crossentropy', 'bbox': 'mse', 'class': 'categorical_crossentropy'}
Hoe can I resolve this error?

Related

Is there a way to divide the keras mobilenetv2 model into submodels?

I am trying to divide the mobilenetv2 model into 2 parts.
I first want to run the first part of the model, save the output, and feed it later on to the second model for certain reasons. I've tried code found here,
but I get the following error:
ValueError: A merge layer should be called on a list of inputs.
I think it is because the model isn't a Sequential.
Can someone help?
As I mentioned in my comments, some layers in mobile_net_v2 expect more than one inputs which are outputs of some other previous layers. Therefore adding them to a sequential model individually causes errors. I have an alternative solution for you. Using the mobile_net_v2 implementation (of my own) in this link, I was able to create the models you want:
import tensorflow as tf
from tensorflow.keras import layers, Model, Sequential
def conv_block(input_tensor, c, s, t, expand=True):
"""
Convolutional Block for mobile net v2
Args:
input_tensor (keras tensor): input tensor
c (int): output channels
s (int): stride size of first layer in the series
t (int): expansion factor
expand (bool): expand filters or not?
Returns: keras tensor
"""
first_conv_channels = input_tensor.get_shape()[-1]
if expand:
x = layers.Conv2D(
first_conv_channels*t,
1,
1,
padding='same',
use_bias=False
)(input_tensor)
x = layers.BatchNormalization()(x)
x = layers.ReLU(6.0)(x)
else:
x = input_tensor
x = layers.DepthwiseConv2D(
3,
s,
'same',
1,
use_bias=False
)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU(6.0)(x)
x = layers.Conv2D(
c,
1,
1,
padding='same',
use_bias=False
)(x)
x = layers.BatchNormalization()(x)
if input_tensor.get_shape() == x.get_shape() and s == 1:
return x+input_tensor
return x
def splitted_model(input_shape=(224,224,3)):
input = layers.Input(shape=input_shape)
x = layers.Conv2D(
32,
3,
2,
padding='same',
use_bias=False
)(input)
x = layers.BatchNormalization()(x)
x = layers.ReLU(6.0)(x)
x = conv_block(x, 16, 1, 1, expand=False)
x = conv_block(x, 24, 2, 6)
x = conv_block(x, 24, 1, 6)
x = conv_block(x, 32, 2, 6)
x = conv_block(x, 32, 1, 6)
x = conv_block(x, 32, 1, 6)
x = conv_block(x, 64, 2, 6)
x = conv_block(x, 64, 1, 6)
x = conv_block(x, 64, 1, 6)
x = conv_block(x, 64, 1, 6)
model_f = Model(inputs=input, outputs=x)
input_2 = layers.Input(shape=(x.shape[1:]))
x = conv_block(input_2, 96, 1, 6)
x = conv_block(x, 96, 1, 6)
x = conv_block(x, 96, 1, 6)
x = conv_block(x, 160, 2, 6)
x = conv_block(x, 160, 1, 6)
x = conv_block(x, 160, 1, 6)
x = conv_block(x, 320, 1, 6)
x = layers.Conv2D(
1280,
1,
1,
padding='same',
use_bias=False
)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU(6.0)(x)
x = layers.GlobalAveragePooling2D()(x)
model_h = Model(inputs=input_2, outputs=x)
return model_f, model_h
You could create your two models as such:
IMG_SIZE = 160
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
model_f, model_h = splitted_model(input_shape=IMG_SHAPE)
Note that the weights are randomly initialized. If you want to have the weights from mobilenet_v2 trained on imagenet, you could run the following code to copy weights:
mobile_net = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
layer_f_counter = 0
layer_h_counter = 0
for i in range(len(mobile_net.layers)):
if layer_f_counter<len(model_f.layers):
if len(mobile_net.layers[i].get_weights()) > 0:
if len(model_f.layers[layer_f_counter].get_weights()) > 0:
print(mobile_net.layers[i].name,'here', model_f.layers[layer_f_counter].name, layer_f_counter)
model_f.layers[layer_f_counter].set_weights(mobile_net.layers[i].get_weights())
layer_f_counter += 1
print(layer_f_counter)
else:
if len(model_f.layers[layer_f_counter].get_weights()) > 0:
continue
else:
layer_f_counter+=1
else:
if layer_h_counter<len(model_h.layers):
if len(mobile_net.layers[i].get_weights()) > 0:
if len(model_h.layers[layer_h_counter].get_weights()) > 0:
print(mobile_net.layers[i].name,'here', model_h.layers[layer_h_counter].name, layer_h_counter)
model_h.layers[layer_h_counter].set_weights(mobile_net.layers[i].get_weights())
layer_h_counter += 1
print(layer_h_counter)
else:
if len(model_h.layers[layer_h_counter].get_weights()) > 0:
continue
else:
layer_h_counter+=1
It iterates through the layers of mobilenet_v2 loaded from Keras, it copies the weights of the first part to model_f, and the rest to model_h. You could check that the weights are correctly copied by print out some random layer weights from mobile_net and also the new models as follows:
print(model_f.layers[1].get_weights()) # printing weights of first conv layer in model_f
print(mobile_net.get_layer('Conv1').get_weights()) # printing weights of fist conv layer in mobile_net
Also for model_h:
print(model_h.layers[-4].get_weights()) # printing weights of last conv layer in model_h
print(mobile_net.get_layer('Conv_1').get_weights()) # printing weights of last conv layer in mobile_net
Note that I randomly selected which block to separate moile_net into model_f and model_h, you could edit it to change where you want to split. Hope it helps.

input_shape not recognised in Keras model

I am trying to use Tensorflow's 2.0 new MirroredStrategy but I am receiving an error saying:
ValueError: We currently do not support distribution strategy with a `Sequential` model that is created without `input_shape`/`input_dim` set in its first layer or a subclassed model.
Model:
class Model(kr.Model):
def __init__(self, input_shape, conv_sizes, num_outputs):
super().__init__('model_1')
self.num_outputs = num_outputs
rows, cols, depth = input_shape
self.one_hot = kl.Lambda(lambda x: tf.one_hot(tf.cast(x, 'int32'), num_outputs), input_shape=(rows, cols))
self.concat = kl.Concatenate(axis=-1)
vision_layers = []
for i, (filters, kernel, stride) in enumerate(conv_sizes):
if not i:
depth += num_outputs - 1
vision_layers += [kl.Conv2D(filters, kernel, stride, activation='relu',
input_shape=(rows, cols, depth))]
else:
vision_layers += [kl.Conv2D(filters, kernel, stride, activation='relu')]
vision_layers += [kl.MaxPool2D(pool_size=(2, 2))]
flatten = kl.Flatten()
dense = kl.Dense(num_outputs)
self.net = kr.Sequential(vision_layers+[flatten]+[dense])
self.build(input_shape=(None, ) + input_shape)
def call(self, inputs):
one_hot = self.one_hot(inputs[:, :, :, -1])
return self.net(self.concat([inputs[:, :, :, :-1], one_hot]))
Reproduction code:
model_args = {'conv_sizes': [(32, (2, 2), 1), (32, (2, 2), 1), (32, (2, 2), 1)],
'input_shape': (50, 50, 6),
'num_outputs': 5}
def dummy_loss(values, targets):
return tf.reduce_sum(values-targets, axis=-1)
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
model = Model(**model_args)
model.compile(optimizer=kr.optimizers.Adam(learning_rate=0.01), loss=dummy_loss)
Output:
Traceback (most recent call last):
File "/home/joao/anaconda3/envs/tf2/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-4-dc492e7c638b>", line 18, in <module>
model.compile(optimizer=kr.optimizers.Adam(learning_rate=0.01), loss=dummy_loss)
File "/home/joao/anaconda3/envs/tf2/lib/python3.6/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/joao/anaconda3/envs/tf2/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 263, in compile
'We currently do not support distribution strategy with a '
ValueError: We currently do not support distribution strategy with a `Sequential` model that is created without `input_shape`/`input_dim` set in its first layer or a subclassed model.
Model Summary (model.summary()):
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lambda (Lambda) multiple 0
_________________________________________________________________
concatenate (Concatenate) multiple 0
_________________________________________________________________
sequential (Sequential) (None, 5) 13573
=================================================================
Total params: 13,573
Trainable params: 13,573
Non-trainable params: 0
I would do away with the Sequential approach and use the Model class directly:
def create_model(input_shape, conv_sizes, fc_sizes, num_outputs):
num_outputs = num_outputs
rows, cols, depth = input_shape
input_layer = kl.Input(shape=(rows, cols, depth))
actions = tf.slice(input_layer, [0, 0, 0, depth - 1], [-1, rows, cols, 1])
non_actions = tf.slice(input_layer, [0, 0, 0, 0], [-1, rows, cols, depth - 1])
one_hot = kl.Lambda(lambda x: tf.one_hot(tf.cast(x, 'int32'), num_outputs),
input_shape=(rows, cols))(actions)
concat = kl.Concatenate(axis=-1)([non_actions, tf.reshape(one_hot, (-1, rows, cols, num_outputs))])
vision_layer = concat
for i, (filters, kernel, stride) in enumerate(conv_sizes):
vision_layer = kl.Conv2D(filters, kernel, stride, activation='relu')(vision_layer)
vision_layer = kl.MaxPool2D(pool_size=(2, 2))(vision_layer)
flatten = kl.Flatten()(vision_layer)
dense = kl.Dense(num_outputs)(flatten)
return kr.Model(inputs=input_layer, outputs=[dense])

Error Applying Selu Activation function with tensorflow

I was trying to implement the new SELU activation function from https://arxiv.org/pdf/1706.02515. For more information here is my code:
import tensorflow as tf
import numpy as np
from PIL import Image
import os
from keras.activations import elu
batch_size = 32
def weight_variable(kernal_shape):
weights = tf.get_variable(name='weights', shape=kernal_shape, dtype=tf.float32, trainable=True,
initializer=tf.truncated_normal_initializer(stddev=0.02))
return weights
def bias_variable(shape):
initial = tf.constant(0.0, shape=shape)
return tf.Variable(initial)
def selu(x):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
#return scale * tf.where(x >= 0.0, x, alpha * tf.exp(x) - alpha)
return scale * elu(x, alpha)
def conv_layer(x, w_shape, b_shape, padding='SAME'):
W = weight_variable(w_shape)
tf.summary.histogram("weights", W)
b = bias_variable(b_shape)
tf.summary.histogram("biases", b)
# Note that I used a stride of 2 on purpose in order not to use max pool layer.
activations = selu(tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding=padding) + b)
tf.summary.histogram(activations.name, activations)
W1 = tf.shape(x)[1]
W2 = tf.shape(activations)[1]
F = w_shape[0]
P = tf.divide(tf.add(tf.subtract(tf.multiply(tf.subtract(W2, 1), 2), W1), F), 2)
return activations, P
def deconv_layer(x, w_shape, b_shape, padding="SAME"):
W = weight_variable(w_shape)
tf.summary.histogram("weights", W)
b = bias_variable(b_shape)
tf.summary.histogram('biases', b)
x_shape = tf.shape(x)
out_shape = tf.stack([x_shape[0], x_shape[1] * 2, x_shape[2] * 2, w_shape[2]])
# Note that I have used a stride of 2 since I used a stride of 2 in conv layer.
transposed_activations = tf.nn.conv2d_transpose(x, W, out_shape, [1, 2, 2, 1], padding=padding) + b
tf.summary.histogram(transposed_activations.name, transposed_activations)
return transposed_activations
tfrecords_filename_seq = ["P16_db.tfrecords"]
filename_queue = tf.train.string_input_producer(tfrecords_filename_seq, num_epochs=None, shuffle=False, name='queue')
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'annotation_raw': tf.FixedLenFeature([], tf.string)
})
# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
image = tf.reshape(image, [height, width, 3])
image = tf.cast([image], tf.float32)
with tf.variable_scope('conv1'):
conv1, P1 = conv_layer(image, [3, 3, 3, 32], [32]) # image size: [56, 56]
with tf.variable_scope('conv2'):
conv2, P2 = conv_layer(conv1, [3, 3, 32, 64], [64]) # image size: [28, 28]
with tf.variable_scope('conv3'):
conv3, P3 = conv_layer(conv2, [3, 3, 64, 128], [128]) # image size: [14, 14]
with tf.variable_scope('conv4'):
conv4, P4 = conv_layer(conv3, [3, 3, 128, 256], [256]) # image size: [7, 7]
conv4_reshaped = tf.reshape(conv4, [-1, 7 * 7 * 256], name='conv4_reshaped')
w_c = tf.Variable(tf.truncated_normal([7 * 7 * 256, 100], stddev=0.1), name='weight_fc')
b_c = tf.Variable(tf.constant(0.1, shape=[100]), name='biases_fc')
tf.summary.histogram('weights_c', w_c)
tf.summary.histogram('biases_c', b_c)
with tf.variable_scope('z'):
z = selu(tf.nn.bias_add(tf.matmul(conv4_reshaped, w_c), b_c))
tf.summary.histogram('features_z', z)
w_dc = tf.Variable(tf.truncated_normal([100, 7 * 7 * 256], stddev=0.1), name='weights_dc')
b_dc = tf.Variable(tf.constant(0.1, shape=[7 * 7 * 256]), name='biases_dc')
tf.summary.histogram('weights_dc', w_dc)
tf.summary.histogram('biases_dc', b_dc)
with tf.variable_scope('deconv4'):
deconv4 = selu(tf.nn.bias_add(tf.matmul(z, w_dc), b_dc))
deconv4_reshaped = tf.reshape(deconv4, [-1, 7, 7, 256], name='deconv4_reshaped')
with tf.variable_scope('deconv3'):
deconv3 = deconv_layer(deconv4_reshaped, [3, 3, 128, 256], [128])
with tf.variable_scope('deconv2'):
deconv2 = deconv_layer(deconv3, [3, 3, 64, 128], [64])
with tf.variable_scope('deconv1'):
deconv1 = deconv_layer(deconv2, [3, 3, 32, 64], [32])
with tf.variable_scope('deconv_image'):
deconv_image = deconv_layer(deconv1, [3, 3, 3, 32], [3])
with tf.name_scope('loss'):
loss = tf.reduce_mean(tf.abs(deconv_image - image))
tf.summary.scalar('loss', loss)
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(0.0001).minimize(loss)
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
saver = tf.train.Saver()
model_path = 'C:/Users/iayou005/Documents/tensorboard_logs/Graph_model/ckpt'
# Here is the session...
with tf.Session() as sess:
train_writer = tf.summary.FileWriter('C:/Users/iayou005/Documents/tensorboard_logs/New_Runs/DeconvNet', sess.graph)
merged = tf.summary.merge_all()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 0
sess.run(init_op)
# Note that the last name "Graph_model" is the name of the saved checkpoints file => the ckpt is saved
# under tensorboard_logs.
ckpt = tf.train.get_checkpoint_state(
os.path.dirname('C:/Users/iayou005/Documents/tensorboard_logs/Graph_model/ckpt'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('checkpoints are saved!!!')
else:
print('No stored checkpoints')
while step < 100000:
if step % 1000 == 0:
img = sess.run([deconv_image])
img2 = Image.fromarray(np.uint8(img[0][0]))
img2.save('Reconstructed' + str(step) + '.png', 'png')
__, loss_s, summary = sess.run([optimizer, loss, merged])
if step % 100 == 0:
train_writer.add_summary(summary, step)
print(loss_s)
step += 1
save_path = saver.save(sess, model_path)
coord.request_stop()
coord.join(threads)
train_writer.close()
So without using Keras I kept getting a NAN:
InvalidArgumentError (see above for traceback): Nan in summary histogram for: conv1/weights_1
[[Node: conv1/weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](conv1/weights_1/tag, conv1/weights/read/_61)]]
I would like to know the reason for getting a NAN.
Any help is much appreciated!!

How to add more layers to Convolutional Neural Network text classification TensorFlow example?

According to the documentation, the model presented in this example is similar to the following paper:
"Character-level Convolutional Networks for Text Classification"
I found that the original model (presented in the paper) contains 9 layers deep with 6 convolutional layers and 3 fully-connected layers, but the implemented example contains only two convolutional layers:
with tf.variable_scope('CNN_Layer1'):
# Apply Convolution filtering on input sequence.
conv1 = tf.contrib.layers.convolution2d(
byte_list, N_FILTERS, FILTER_SHAPE1, padding='VALID')
# Add a RELU for non linearity.
conv1 = tf.nn.relu(conv1)
# Max pooling across output of Convolution+Relu.
pool1 = tf.nn.max_pool(
conv1,
ksize=[1, POOLING_WINDOW, 1, 1],
strides=[1, POOLING_STRIDE, 1, 1],
padding='SAME')
# Transpose matrix so that n_filters from convolution becomes width.
pool1 = tf.transpose(pool1, [0, 1, 3, 2])
with tf.variable_scope('CNN_Layer2'):
# Second level of convolution filtering.
conv2 = tf.contrib.layers.convolution2d(
pool1, N_FILTERS, FILTER_SHAPE2, padding='VALID')
# Max across each filter to get useful features for classification.
pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
If anybody can help me to extend this model for more layers?
Similar to BVLC Caffenet :
def bvlc_caffenet(imgs,weights,biases):
# mean subtraction
mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
images = imgs-mean
#conv1
conv1 = tf.nn.conv2d(images,weights['c1'], [1, 3, 3, 1], padding='VALID')
out1 = tf.nn.relu(tf.nn.bias_add(conv1, biases['b1']))
pool1 = tf.nn.max_pool(out1,ksize=[1,3,3,1], strides=[1,2,2,1],padding='VALID')
#conv2
conv2 = tf.nn.conv2d(pool1,weights['c2'], [1, 1, 1, 1], padding='VALID')
out2 = tf.nn.relu(tf.nn.bias_add(conv2, biases['b2']))
pool2 = tf.nn.max_pool(out2,ksize=[1,3,3,1], strides=[1,2,2,1],padding='VALID')
#conv3
conv3 = tf.nn.conv2d(pool2,weights['c3'], [1, 1, 1, 1], padding='VALID')
out3 = tf.nn.relu(tf.nn.bias_add(conv3, biases['b3']))
#conv4
conv4 = tf.nn.conv2d(out3,weights['c4'], [1, 1, 1, 1], padding='VALID')
out4 = tf.nn.relu(tf.nn.bias_add(conv4, biases['b4']))
#conv5
conv5 = tf.nn.conv2d(out4,weights['c5'], [1, 1, 1, 1], padding='VALID')
out5 = tf.nn.relu(tf.nn.bias_add(conv5, biases['b5']))
pool5 = tf.nn.max_pool(out5,ksize=[1,3,3,1], strides=[1,2,2,1],padding='VALID')
#flattening
shape = int(np.prod(pool5.get_shape()[1:]))
pool5_flat = tf.reshape(pool5, [-1, shape])
#fc6
fc6 = tf.matmul(pool5_flat,weights['f6'])
out6 = tf.nn.relu(tf.nn.bias_add(fc6,biases['b6']))
out6 = tf.nn.dropout(out6,0.5)
#fc7
fc7 = tf.matmul(out6,weights['f7'])
out7 = tf.nn.relu(tf.nn.bias_add(fc7,biases['b7']))
out7 = tf.nn.dropout(out7,0.5)
#fc8
fc8 = tf.matmul(out7,weights['f8'])
out8 = tf.nn.relu(tf.nn.bias_add(fc8,biases['b8']))
out8 = tf.nn.dropout(out8,0.5)
probs = tf.nn.softmax(out8)
return probs
Initialized Weights and Biases for the Network
weights = {
'c1': tf.Variable(tf.truncated_normal([7,7,3,96],stddev=0.1)),
'c2': tf.Variable(tf.truncated_normal([5,5,96,256],stddev=0.1)),
'c3': tf.Variable(tf.truncated_normal([3,3,256,384],stddev=0.1)),
'c4': tf.Variable(tf.truncated_normal([3,3,384,384],stddev=0.1)),
'c5': tf.Variable(tf.truncated_normal([3,3,384,256],stddev=0.1)),
'f6': tf.Variable(tf.truncated_normal([4096,2048],stddev=0.1)),
'f7': tf.Variable(tf.truncated_normal([2048,2048],stddev=0.1)),
'f8': tf.Variable(tf.truncated_normal([2048,1000],stddev=0.1))
}
biases = {
'b1' : tf.Variable(tf.constant(0.1, shape=[96])),
'b2' : tf.Variable(tf.constant(0.1, shape=[256])),
'b3' : tf.Variable(tf.constant(0.1, shape=[384])),
'b4' : tf.Variable(tf.constant(0.1, shape=[384])),
'b5' : tf.Variable(tf.constant(0.1, shape=[256])),
'b6' : tf.Variable(tf.constant(0.1, shape=[2048])),
'b7' : tf.Variable(tf.constant(0.1, shape=[2048])),
'b8' : tf.Variable(tf.constant(0.1, shape=[1000]))
}
Or
follow this (another format) : https://www.cs.toronto.edu/~frossard/vgg16/vgg16.py
Are these helpful ?

tf.reshape doesn't work as expected

Actually, I don't know how to describe this question. It's so strange.
import tensorflow as tf
import numpy as np
import pickle
def weight_and_bias(name ,shape):
weight = tf.get_variable("W" + name, shape=shape, initializer=tf.contrib.layers.xavier_initializer())
bias = tf.get_variable("B" + name, shape=shape[-1], initializer=tf.contrib.layers.xavier_initializer())
return weight, bias
def conv2d_2x2(x, W):
return tf.nn.conv2d(x, W, strides=[1, 5, 5, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
sess = tf.InteractiveSession()
source = tf.placeholder(tf.float32, [None, None, 50, 50])
source_len = tf.placeholder(tf.int32, [None])
source_max_step = tf.shape(source)[1]
target = tf.placeholder(tf.float32, [None, None, 50, 50])
target_len = tf.placeholder(tf.int32, [None])
target_max_step = tf.shape(target)[1]
W_conv, B_conv = weight_and_bias('conv1', [5, 5, 1, 32])
source = tf.reshape(source, [-1, 50, 50], "source_reshape")
source_tmp = tf.reshape(source, [-1, 50, 50 ,1])
source_conv = tf.nn.relu(conv2d_2x2(source_tmp, W_conv) + B_conv)
source_pool = max_pool_2x2(source_conv)
source_flat = tf.reshape(source_pool, [-1, 5 * 5 * 32], "source_pool_reshape")
source = tf.reshape(source_flat, [-1, source_max_step, 5*5*32], "source_flat_reshape")
W_conv, B_conv = weight_and_bias('conv2', [5, 5, 1, 32])
target = tf.reshape(target, [-1, 50, 50], "target_reshape")
target_tmp = tf.reshape(target, [-1, 50, 50 ,1])
target_conv = tf.nn.relu(conv2d_2x2(target_tmp, W_conv) + B_conv)
target_pool = max_pool_2x2(target_conv)
target_flat = tf.reshape(target_pool, [-1, 5 * 5 * 32], "target_pool_reshape")
target = tf.reshape(target_flat, [-1, target_max_step, 5*5*32], "target_flat_reshape")
source_cell = tf.nn.rnn_cell.LSTMCell(500, initializer=tf.contrib.layers.xavier_initializer())
target_cell = tf.nn.rnn_cell.LSTMCell(500, initializer=tf.contrib.layers.xavier_initializer())
source_rnn_output, _ = tf.nn.dynamic_rnn(source_cell, source, source_len, dtype=tf.float32, scope = "source")
target_rnn_output, _ = tf.nn.dynamic_rnn(target_cell, target, target_len, dtype=tf.float32, scope = "target")
source_output = tf.transpose(source_rnn_output, [1, 0, 2])
target_output = tf.transpose(target_rnn_output, [1, 0, 2])
source_final_output = tf.gather(source_output, -1)
target_final_output = tf.gather(target_output, -1)
output = tf.concat(1, [source_final_output, target_final_output])
W_sf, B_sf = weight_and_bias('sf', [1000, 2])
predict = tf.nn.softmax(tf.matmul(output, W_sf) + B_sf)
y = tf.placeholder(tf.float32, [None, 2])
cross_entropy = -tf.reduce_sum(y * tf.log(predict))
train_step = tf.train.RMSPropOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.arg_max(predict, 1), tf.arg_max(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with open('set', 'rb') as f:
_set = pickle.load(f)
training_set = _set[0]
training_len = _set[1]
training_label = _set[2]
sess.run(tf.global_variables_initializer())
for i in range(20000):
if i % 100 == 0:
train_accuacy = accuracy.eval(feed_dict = {source: training_set[0], target: training_set[1], source_len: training_len[0], target_len: training_len[1], y: training_label})
print("step %d, training accuracy %g"%(i, train_accuacy))
train_step.run(feed_dict = {source: training_set[0], target: training_set[1], source_len: training_len[0], target_len: training_len[1], y: training_label})
These are my whole code, I can't find any problem in it.
But a ValueError: Cannot feed value of shape (1077, 27, 50, 50) for Tensor 'source_flat_reshape:0', which has shape '(?, ?, 800)' was raised.
The error message is strange, because it seems happen at source = tf.reshape(source_flat, [-1, source_max_step, 5*5*32], "source_flat_reshape"), but how could source_flat has a shape of (1077, 27, 50, 50)? It should be (1077*77, 800)
And, sometimes another ValueError: Cannot feed value of shape (1077, 27, 50, 50) for Tensor 'Reshape:0', which has shape '(?, 50, 50)' was raised.
It is also difficult to understand, why it happened?
Hope anyone could give me a hand.
Look what happens when you use feed_dict - you reference the variables source and target. However, the python variable no longer refers to the placeholders but rather the reshape ops - hence the op is 'skipped'.
The easiest fix is renaming the placeholders to something unique. Further down in the network it is OK to reuse the same name (you could just call every layer net), it doesn't matter as long as you no longer need to reference them.
Try giving this a go?
import tensorflow as tf
import numpy as np
import pickle
def weight_and_bias(name ,shape):
weight = tf.get_variable("W" + name, shape=shape, initializer=tf.contrib.layers.xavier_initializer())
bias = tf.get_variable("B" + name, shape=shape[-1], initializer=tf.contrib.layers.xavier_initializer())
return weight, bias
def conv2d_2x2(x, W):
return tf.nn.conv2d(x, W, strides=[1, 5, 5, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
sess = tf.InteractiveSession()
source_placeholder = tf.placeholder(tf.float32, [None, None, 50, 50])
source_len = tf.placeholder(tf.int32, [None])
source_max_step = tf.shape(source)[1]
target_placeholder = tf.placeholder(tf.float32, [None, None, 50, 50])
target_len = tf.placeholder(tf.int32, [None])
target_max_step = tf.shape(target)[1]
W_conv, B_conv = weight_and_bias('conv1', [5, 5, 1, 32])
source = tf.reshape(source_placeholder, [-1, 50, 50], "source_reshape")
source_tmp = tf.reshape(source, [-1, 50, 50 ,1])
source_conv = tf.nn.relu(conv2d_2x2(source_tmp, W_conv) + B_conv)
source_pool = max_pool_2x2(source_conv)
source_flat = tf.reshape(source_pool, [-1, 5 * 5 * 32], "source_pool_reshape")
source = tf.reshape(source_flat, [-1, source_max_step, 5*5*32], "source_flat_reshape")
W_conv, B_conv = weight_and_bias('conv2', [5, 5, 1, 32])
target = tf.reshape(target_placeholder, [-1, 50, 50], "target_reshape")
target_tmp = tf.reshape(target, [-1, 50, 50 ,1])
target_conv = tf.nn.relu(conv2d_2x2(target_tmp, W_conv) + B_conv)
target_pool = max_pool_2x2(target_conv)
target_flat = tf.reshape(target_pool, [-1, 5 * 5 * 32], "target_pool_reshape")
target = tf.reshape(target_flat, [-1, target_max_step, 5*5*32], "target_flat_reshape")
source_cell = tf.nn.rnn_cell.LSTMCell(500, initializer=tf.contrib.layers.xavier_initializer())
target_cell = tf.nn.rnn_cell.LSTMCell(500, initializer=tf.contrib.layers.xavier_initializer())
source_rnn_output, _ = tf.nn.dynamic_rnn(source_cell, source, source_len, dtype=tf.float32, scope = "source")
target_rnn_output, _ = tf.nn.dynamic_rnn(target_cell, target, target_len, dtype=tf.float32, scope = "target")
source_output = tf.transpose(source_rnn_output, [1, 0, 2])
target_output = tf.transpose(target_rnn_output, [1, 0, 2])
source_final_output = tf.gather(source_output, -1)
target_final_output = tf.gather(target_output, -1)
output = tf.concat(1, [source_final_output, target_final_output])
W_sf, B_sf = weight_and_bias('sf', [1000, 2])
predict = tf.nn.softmax(tf.matmul(output, W_sf) + B_sf)
y = tf.placeholder(tf.float32, [None, 2])
cross_entropy = -tf.reduce_sum(y * tf.log(predict))
train_step = tf.train.RMSPropOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.arg_max(predict, 1), tf.arg_max(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with open('set', 'rb') as f:
_set = pickle.load(f)
training_set = _set[0]
training_len = _set[1]
training_label = _set[2]
sess.run(tf.global_variables_initializer())
for i in range(20000):
if i % 100 == 0:
train_accuacy = accuracy.eval(feed_dict = {source_placeholder: training_set[0], target_placeholder: training_set[1], source_len: training_len[0], target_len: training_len[1], y: training_label})
print("step %d, training accuracy %g"%(i, train_accuacy))
train_step.run(feed_dict = {source_placeholder: training_set[0], target_placeholder: training_set[1], source_len: training_len[0], target_len: training_len[1], y: training_label})