I'm new to tensorflow. I'm building a 3-layer neural network (just one hidden layer ) using tensorflow and I want to apply a custom activation function to its hidden layer.
I implemented it using np library:
def my_network(input_layer,centers,beta, weights):
layer_1 = input_layer
gaussian = np.array([[sum([i*i for i in vec]) for vec in layer_1-center] for center in centers])
a = beta.reshape(len(beta),1)* gaussian
layer_2 = np.array([[np.exp(i) for i in vec] for vec in a])
output = tf.matmul(np.transpose(layer_2).astype(np.float32), weights['w'])
return output
I want to convert it to some code that is suitable with tensorflow and its gradients. How should I do this?
Try this small snippet for multiple convolution layers:
# placeholders
X = tf.placeholder(tf.float32, [None, 28, 28, 1], name="input_X")
y = tf.placeholder(tf.float32, [None, 14, 14, 1], name="Output_y")
# C1
with tf.name_scope("layer1"):
W1 = tf.get_variable("W1", shape=[3, 3, 1, 32],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", shape=[32], initializer=tf.contrib.layers.xavier_initializer())
layer1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME') + b1
layer1_act = tf.nn.relu(layer1) # here you can change to other activation function
# C2
with tf.name_scope("layer2"):
W2 = tf.get_variable("W2", shape=[3, 3, 32, 64],
initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", shape=[64], initializer=tf.contrib.layers.xavier_initializer())
layer2 = tf.nn.conv2d(layer1_act, W2, strides=[1, 1, 1, 1], padding='SAME') + b2
layer2_act = tf.nn.relu(layer2) # here you can change to other activation function
# max pool
with tf.name_scope("maxpool"):
maxpool = tf.nn.max_pool(layer2_act, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') #just to show how to use maxpool
# C3
with tf.name_scope("layer3"):
W3 = tf.get_variable("W3", shape=[3, 3, 64, 32],
initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.get_variable("b3", shape=[32], initializer=tf.contrib.layers.xavier_initializer())
layer3 = tf.nn.conv2d(maxpool, W3, strides=[1, 1, 1, 1], padding='SAME') + b3
layer3_act = tf.nn.relu(layer3) # here you can change to other activation function
#draw graph of train operation
with tf.name_scope('loss and train operation'):
loss = tf.reduce_mean(tf.losses.mean_squared_error(
labels=tf.cast(y, tf.int32),
predictions=layer3_act))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
train_op = optimizer.minimize(loss)
I'm experimenting with CNNs and I'm baffled, because model I've built actually learns slower and performs worse than fully connected NN. Here are two models:
fully connected:
hidden1 = tf.layers.dense(X, 2000, name="hidden1",
activation=tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, 1000, name="hidden2",
activation=tf.nn.relu)
hidden3 = tf.layers.dense(hidden2, 1000, name="hidden3",
activation=tf.nn.relu)
hidden4 = tf.layers.dense(hidden3, 1000, name="hidden4",
activation=tf.nn.relu)
hidden5 = tf.layers.dense(hidden4, 700, name="hidden5",
activation=tf.nn.relu)
hidden6 = tf.layers.dense(hidden5, 500, name="hidden6",
activation=tf.nn.relu)
logits = tf.layers.dense(hidden6, 2, name="outputs")
CNN:
f = tf.get_variable('conv1-fil', [5,5,1,10])
conv1 = tf.nn.conv2d(X, filter=f, strides=[1, 1, 1, 1], padding="SAME")
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
f2 = tf.get_variable('conv2-fil', [3,3,10,7])
conv2 = tf.nn.conv2d(pool1, filter=f2, strides=[1, 1, 1, 1], padding="SAME")
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
fc1 = tf.contrib.layers.flatten(pool2)
hidden1 = tf.layers.dense(fc1, 3630, name="hidden1",
activation=tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, 2000, name="hidden2",
activation=tf.nn.relu)
hidden3 = tf.layers.dense(hidden2, 1000, name="hidden3",
activation=tf.nn.relu)
hidden5 = tf.layers.dense(hidden3, 700, name="hidden5",
activation=tf.nn.relu)
hidden6 = tf.layers.dense(hidden5, 500, name="hidden6",
activation=tf.nn.relu)
logits = tf.layers.dense(hidden6, 2, name="outputs")
Basically CNN have a little more shallow fully connected net, but added conv layers vs just fully connected. CNN arrives to accuracy ~88% vs 92% of deep nn after same number of epochs and same dataset. How to debug issues like that? What are good practices in designing conv layers?
x_image = tf.reshape(x, [-1, IMG_SIZE, IMG_SIZE, 3]) # 128
W_conv1 = tf.get_variable("W_conv1", shape=[3, 3, 3, 64], initializer=xavier())
b_conv1 = tf.get_variable('b_conv1', [1, 1, 1, 64])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1) # 64
W_conv2 = tf.get_variable("W_conv2", shape=[3, 3, 64, 128], initializer=xavier())
b_conv2 = tf.get_variable('b_conv2', [1, 1, 1, 128])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2) # 32
W_conv3 = tf.get_variable("W_conv3", shape=[3, 3, 128, 256], initializer=xavier())
b_conv3 = tf.get_variable('b_conv3', [1, 1, 1, 256])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3) # 16
W_conv4 = tf.get_variable("W_conv4", shape=[3, 3, 256, 512], initializer=xavier())
b_conv4 = tf.get_variable('b_conv4', [1, 1, 1, 512])
h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4)
h_pool4 = max_pool_2x2(h_conv4) # 8
W_conv5 = tf.get_variable("W_conv5", shape=[3, 3, 512, 512], initializer=xavier())
b_conv5 = tf.get_variable('b_conv5', [1, 1, 1, 512])
h_conv5 = tf.nn.relu(conv2d(h_pool4, W_conv5) + b_conv5)
h_pool5 = max_pool_2x2(h_conv5) # 4
h_pool5_flat = tf.reshape(h_pool5, [-1, 4 * 4 * 512])
W_fc1 = tf.get_variable("W_fc1", shape=[4 * 4 * 512, 4096], initializer=xavier())
b_fc1 = tf.get_variable('b_fc1', [4096], initializer=init_ops.zeros_initializer())
h_fc1 = tf.nn.relu(tf.matmul(h_pool5_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fcO = tf.get_variable("W_fcO", shape=[4096, 2], initializer=xavier())
b_fcO = tf.get_variable('b_fcO', [2], initializer=init_ops.zeros_initializer())
logits = tf.matmul(h_fc1_drop, W_fcO) + b_fcO
y_conv = tf.nn.softmax(logits)
cross_entropy = loss_ops.softmax_cross_entropy(logits, y_)
train_step = tf.train.AdamOptimizer(0.0005).minimize(cross_entropy)
self.results = predictions = tf.argmax(y_conv, 1)
self.probabilities = y_conv
correct_prediction = tf.equal(predictions, tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
I have only 2 classes. My batch was 32K positive and 32K negative images (128x128). I ran for 80 epochs. Testing neural network for positive / negative was very promising. I've ran another batch of tests on my computer. Ran 500 negative pictures with 8% false positive result and 200 positive images with 9% false negative result.
The problem occurred when I've tried checking probabilities. Softmax is returning for each class mainly 1.0 or 0.0. Shouldn't it be like 0.745 / 0.255 something like that.
Even though it classifies images pretty well, softmax returns too dense results. Where is my mistake?
I am trying to generate an unrecognizable image which can fool Vggnet. I used the following vgg model for tensorflow. I add some modification for calculating the gradient. In the ending part, you can see my modification for calculating the gradient respect to the given image (is it correct? I am trying to generate an image to whom the vggnet assign high probability at class 1). With this gradient, I update the random image for fooling the vggnet. But this is not so successful. I can't generate an image with high probability. The maximum probability I got is around 0.001. How can I make it keep increasing?
.
Vggnet model
#
# Davi Frossard, 2016 #
# VGG16 implementation in TensorFlow #
# Details: #
# http://www.cs.toronto.edu/~frossard/post/vgg16/ #
# #
# Model from https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md #
# Weights from Caffe converted using https://github.com/ethereon/caffe-tensorflow #########################################################################################
import tensorflow as tf
import numpy as np
from scipy.misc import imread, imresize
from imagenet_classes import class_names
class vgg16:
def __init__(self, imgs, weights=None, sess=None):
self.imgs = imgs
self.convlayers()
self.fc_layers()
self.probs = tf.nn.softmax(self.fc3l, name= 'prob')
if weights is not None and sess is not None:
self.load_weights(weights, sess)
def convlayers(self):
self.parameters = []
# zero-mean input
with tf.name_scope('preprocess') as scope:
mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
images = self.imgs-mean
# conv1_1
with tf.name_scope('conv1_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv1_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv1_2
with tf.name_scope('conv1_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv1_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool1
self.pool1 = tf.nn.max_pool(self.conv1_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool1')
# conv2_1
with tf.name_scope('conv2_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv2_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv2_2
with tf.name_scope('conv2_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv2_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv2_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool2
self.pool2 = tf.nn.max_pool(self.conv2_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2')
# conv3_1
with tf.name_scope('conv3_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv3_2
with tf.name_scope('conv3_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv3_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv3_3
with tf.name_scope('conv3_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv3_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool3
self.pool3 = tf.nn.max_pool(self.conv3_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool3')
# conv4_1
with tf.name_scope('conv4_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool3, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv4_2
with tf.name_scope('conv4_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv4_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv4_3
with tf.name_scope('conv4_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv4_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool4
self.pool4 = tf.nn.max_pool(self.conv4_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
# conv5_1
with tf.name_scope('conv5_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool4, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv5_2
with tf.name_scope('conv5_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv5_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv5_3
with tf.name_scope('conv5_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv5_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
def fc_layers(self):
# fc1
with tf.name_scope('fc1') as scope:
shape = int(np.prod(self.pool5.get_shape()[1:]))
fc1w = tf.Variable(tf.truncated_normal([shape, 4096],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
pool5_flat = tf.reshape(self.pool5, [-1, shape])
fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b)
self.fc1 = tf.nn.relu(fc1l)
self.parameters += [fc1w, fc1b]
# fc2
with tf.name_scope('fc2') as scope:
fc2w = tf.Variable(tf.truncated_normal([4096, 4096],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc2w), fc2b)
self.fc2 = tf.nn.relu(fc2l)
self.parameters += [fc2w, fc2b]
# fc3
with tf.name_scope('fc3') as scope:
fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
trainable=True, name='biases')
self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
self.parameters += [fc3w, fc3b]
###################### Modified part######################
with tf.name_scope('grad') as scope:
temp = np.zeros(1000)
temp[0] = 1
vec = tf.constant(temp, dtype='float32', name = 'goal')
loss = tf.reduce_mean(tf.square(tf.sub(tf.nn.softmax(self.fc3l), vec)))
self.grad = tf.gradients(loss, self.imgs)[-1]
##############################################################
def load_weights(self, weight_file, sess):
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i, k in enumerate(keys):
print i, k, np.shape(weights[k])
sess.run(self.parameters[i].assign(weights[k]))
Create session
#
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
sess = tf.Session()
imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg = vgg16(imgs, 'vgg16_weights.npz', sess)
Generate new image for fooling
#
imarray = np.random.rand(224,224,3) * 255
imarray = imarray.astype('float32')
feed_dict = {vgg.imgs: [imarray]}
prob_list = []
prob_list.append(sess.run(vgg.probs, feed_dict={vgg.imgs: [imarray]})[0][0])
lamda = 0.1
#mean = np.array([123.68, 116.779, 103.939])
print 'start'
for i in range(1000):
rst = sess.run(vgg.grad, feed_dict)
imarray -= lamda * (rst[0]*255)
feed_dict = {vgg.imgs: [imarray]}
prob_list.append(sess.run(vgg.probs, feed_dict={vgg.imgs: [imarray]})[0][0])
I'm surprised that the shapes of the gradient and the image match.
You are taking the derivative of the loss with respect to the parameters, is should be with respect to the image placeholder. Excuse me, if I'm missing something obvious, I can't run the code right now.
The computation of the loss is based on fc3l, the final output is probs. I don't see where probs is computed in the VGG code. Maybe there are layers in between. You could plot the first component of fc3l instead, see if that goes up.
You should probably base the loss on probs.
I want to try to build a multi-scale CNN using tensorflow from the cifar10 code.
For what I understood I should take the output of the first conv layer and merge it with the output of the second conv layer to feed the first fully connected layer. Is that right? If yes, how to actually do this?
I have almost the same first layers as for the cifar10 except for the norm1 and the pool1 layers that are switched
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
# norm1
norm1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
# pool1
pool1 = tf.nn.max_pool(norm1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
# norm2
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
Then I try to merge the norm1 layer with the pool2 layer using concat.
Here's how I do this
# local3
with tf.variable_scope('local3') as scope:
#concatenate tensors
concat = tf.concat(2,[pool1,pool2])
# Move everything into depth so we can perform a single matrix multiply.
dim=1
for d in concat.get_shape()[1:].as_list():
dim *= d
reshape = tf.reshape(concat, [FLAGS.batch_size, dim])
weights = _variable_with_weight_decay('weights', shape=[dim, 384],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
local3 = tf.nn.relu_layer(reshape, weights, biases, name=scope.name)
_activation_summary(local3)
I'm not even sure that this is the right procedure because the loss is now 17 when in the single scale case I had the initial loss set around 3.
Is this common?
Thanks in advance.