Related
1:
when attempting to perfrom a pytorch training sequence using batch sizes, my loss function appears to error when the nn output and a batch are put through a MSEloss function.
2:
have tried to search about nn padding, however this is not a covnet but rather an autoencoder, similar stack over flow issues have not yielded results.
3:
the NN:
class Net(nn.Module):
def __init__(self, input_dim=10):
super().__init__()
self.fc1 = nn.Linear(input_dim, int(0.75 * input_dim))
self.fc2 = nn.Linear(int(0.75 * input_dim), int(0.5 * input_dim))
self.fc3 = nn.Linear(int(0.5 * input_dim), int(0.33 * input_dim))
self.fc4 = nn.Linear(int(0.33 * input_dim), int(0.25 * input_dim))
self.fc5 = nn.Linear(int(0.25 * input_dim), int(0.33 * input_dim))
self.fc6 = nn.Linear(int(0.33 * input_dim), int(0.5 * input_dim))
self.fc7 = nn.Linear(int(0.5 * input_dim), int(0.75 * input_dim))
self.fc8 = nn.Linear(int(0.75 * input_dim), input_dim)
def forward(self, x):
x = torch.tanh(self.fc1(x))
x = torch.tanh(self.fc2(x))
x = torch.tanh(self.fc3(x))
x = torch.tanh(self.fc4(x))
x = torch.tanh(self.fc5(x))
x = torch.tanh(self.fc6(x))
x = torch.tanh(self.fc7(x))
x = self.fc8(x)
return torch.softmax(x, dim=1)
the train method:
def train(net, x_train, x_opt, BATCH_SIZE, EPOCHS, input_dim):
outputs = 0
mse = 0
optimizer = optim.SGD(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
for epoch in range(EPOCHS):
for i in tqdm(range(0, len(x_train), BATCH_SIZE)):
batch_x = x_train[i:i + BATCH_SIZE]
# print("bx", batch_x.size())
batch_y = x_opt[i:i + BATCH_SIZE]
# print("by", batch_y.size())
net.zero_grad()
# batch_x.view(batch_y.shape[0])
outputs = net(batch_x)
# print('out', outputs)
loss = loss_function(outputs, batch_y)
loss.backward()
optimizer.step() # Does the update
print(f"Epoch: {epoch}. Loss: {loss}")
error:
99%|█████████▉| 1452/1466 [00:02<00:00, 718.09it/s]B:\tools and software\Anaconda\envs\pysyft-pytorch\lib\site-packages\torch\nn\modules\loss.py:431: UserWarning: Using a target size (torch.Size([39, 10])) that is different to the input size (torch.Size([38, 10])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
100%|█████████▉| 1465/1466 [00:02<00:00, 718.36it/s]
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "B:\tools and software\PyCharm 2020.1\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "B:\tools and software\PyCharm 2020.1\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "B:/projects/openProjects/githubprojects/BotnetTrafficAnalysisFederaedLearning/anomaly-detection/pytorch_conversion.py", line 154, in <module>
input_dim=input_dim)
File "B:/projects/openProjects/githubprojects/BotnetTrafficAnalysisFederaedLearning/anomaly-detection/pytorch_conversion.py", line 64, in train
loss = loss_function(outputs, batch_y)
File "B:\tools and software\Anaconda\envs\pysyft-pytorch\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
result = self.forward(*input, **kwargs)
File "B:\tools and software\Anaconda\envs\pysyft-pytorch\lib\site-packages\torch\nn\modules\loss.py", line 431, in forward
return F.mse_loss(input, target, reduction=self.reduction)
File "B:\tools and software\Anaconda\envs\pysyft-pytorch\lib\site-packages\torch\nn\functional.py", line 2215, in mse_loss
expanded_input, expanded_target = torch.broadcast_tensors(input, target)
File "B:\tools and software\Anaconda\envs\pysyft-pytorch\lib\site-packages\torch\functional.py", line 52, in broadcast_tensors
return torch._C._VariableFunctions.broadcast_tensors(tensors)
RuntimeError: The size of tensor a (38) must match the size of tensor b (39) at non-singleton dimension 0
The error seems to say that the batch sizes of the target and the output are not the same have you tried printing the size of the target and output. If so what are the results. Also you might want to print the size of the input to the model to see if there is something off there. Sorry for posting this to an answer I can't comment yet.
I have been getting this error and i cant figure out the reason. if anyone could help would be great.
this is my code:
import numpy as np
import pickle
import os
import download
#from dataset import one_hot_encoded
#from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from random import shuffle
data_path = "D:/Personal details/Internship/"
# Width and height of each image.
img_size = 32
# Number of channels in each image, 3 channels: Red, Green, Blue.
num_channels = 3
# Length of an image when flattened to a 1-dim array.
img_size_flat = img_size * img_size * num_channels
# Number of classes.
num_classes = 10
# Number of files for the training-set.
_num_files_train = 5
# Number of images for each batch-file in the training-set.
_images_per_file = 10000
def _get_file_path(filename=""):
return os.path.join(data_path, "cifar-10-batches-py/", filename)
def _unpickle(filename):
file_path = _get_file_path(filename)
print("Loading data: " + file_path)
with open(file_path, mode='rb') as file:
# In Python 3.X it is important to set the encoding,
# otherwise an exception is raised here.
data = pickle.load(file, encoding='bytes')
return data
def _convert_images(raw):
# Convert the raw images from the data-files to floating-points.
raw_float = np.array(raw, dtype=float) / 255.0
# Reshape the array to 4-dimensions.
images = raw_float.reshape([-1, num_channels, img_size, img_size])
# Reorder the indices of the array.
images = images.transpose([0, 2, 3, 1])
return images
def _load_data(filename):
# Load the pickled data-file.
data = _unpickle(filename)
# Get the raw images.
raw_images = data[b'data']
# Get the class-numbers for each image. Convert to numpy-array.
cls = np.array(data[b'labels'])
# Convert the images.
images = _convert_images(raw_images)
return images, cls
def load_class_names():
# Load the class-names from the pickled file.
raw = _unpickle(filename="batches.meta")[b'label_names']
# Convert from binary strings.
names = [x.decode('utf-8') for x in raw]
return names
def load_training_data():
images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], dtype=float)
cls = np.zeros(shape=[_num_images_train], dtype=int)
# Begin-index for the current batch.
begin = 0
# For each data-file.
for i in range(_num_files_train):
# Load the images and class-numbers from the data-file.
images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1))
# Number of images in this batch.
num_images = len(images_batch)
# End-index for the current batch.
end = begin + num_images
# Store the images into the array.
images[begin:end, :] = images_batch
# Store the class-numbers into the array.
cls[begin:end] = cls_batch
# The begin-index for the next batch is the current end-index.
begin = end
return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes)
def load_test_data():
images, cls = _load_data(filename="test_batch")
return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes)
########################################################################
def one_hot_encoded(class_numbers, num_classes=None):
if num_classes is None:
num_classes = np.max(class_numbers) + 1
return np.eye(num_classes, dtype=float)[class_numbers]
class_names = load_class_names()
images_train, cls_train, labels_train = load_training_data()
images_test, cls_test, labels_test = load_test_data()
images_train_train = images_train[0:45000]
validation_train = images_train[45000:50000]
labels_train_train = labels_train[0:45000]
validation_labels = labels_train[45000:]
print(len(images_train_train))
print(len(validation_train))
##print(class_names)
##print(len(images_train))
##print(cls_train)
##print(labels_train)
##print(cls_test)
##print(labels_test)
n_classes = len(class_names)
batch_size = 128
x = tf.placeholder(tf.float32, shape=[None, 32, 32, 3], name='x')
y = tf.placeholder(tf.float32, shape=[None, n_classes], name='y_true')
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1': tf.Variable(tf.random_normal([3,3,3,64])),
'W_conv2': tf.Variable(tf.random_normal([3,3,64,128])),
'W_conv3': tf.Variable(tf.random_normal([3,3,128,256])),
'W_conv4': tf.Variable(tf.random_normal([3,3,256,256])),
'W_fc1': tf.Variable(tf.random_normal([256,1024])),
'W_fc2': tf.Variable(tf.random_normal([1024,1024])),
'soft_max': tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1': tf.Variable(tf.random_normal([64])),
'b_conv2': tf.Variable(tf.random_normal([128])),
'b_conv3': tf.Variable(tf.random_normal([256])),
'b_conv4': tf.Variable(tf.random_normal([256])),
'b_fc1': tf.Variable(tf.random_normal([1024])),
'b_fc2': tf.Variable(tf.random_normal([1024])),
'soft_max': tf.Variable(tf.random_normal([n_classes]))}
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['b_conv3'])
conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['b_conv4'])
conv4 = maxpool2d(conv4)
fc1 = tf.reshape(conv4,[256,-1])
fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
fc2 = tf.nn.relu(tf.matmul(fc1, weights['W_fc2'] + biases['b_fc2']))
soft_max = tf.matmul(fc2, weights['soft_max']) + biases['soft_max']
return soft_max
def train_neural_network(x):
prediction = convolutional_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = prediction,labels = y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 3
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(hm_epochs):
epoch_loss = 0
i = 0
while i < len(images_train_train):
start = i
end = i+batch_size
batch_x = np.array(images_train_train[start:end])
batch_y = np.array(labels_train_train[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:validation_train, y:validation_labels}))
train_neural_network(x)
Ans this is the error i have been getting.
WARNING:tensorflow:From D:/Personal details/Internship/cifar-10v1.0.py:310: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.
See #{tf.nn.softmax_cross_entropy_with_logits_v2}.
WARNING:tensorflow:From C:\Python35\lib\site-packages\tensorflow\python\util\tf_should_use.py:118: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
Traceback (most recent call last):
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
return fn(*args)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/Personal details/Internship/cifar-10v1.0.py", line 344, in <module>
train_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 327, in train_neural_network
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 900, in run
run_metadata_ptr)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1316, in _do_run
run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
Caused by op 'MatMul', defined at:
File "<string>", line 1, in <module>
File "C:\Python35\lib\idlelib\run.py", line 130, in main
ret = method(*args, **kwargs)
File "C:\Python35\lib\idlelib\run.py", line 357, in runcode
exec(code, self.locals)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 344, in <module>
train_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 309, in train_neural_network
prediction = convolutional_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 300, in convolutional_neural_network
fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
File "C:\Python35\lib\site-packages\tensorflow\python\ops\math_ops.py", line 2122, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 4567, in mat_mul
name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 3392, in create_op
op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
It looks like the problem is in convolutional_neural_network layer() function wherein somehow it is mad at not being able to multiply the same dimension of the matrix. But it is not clear how to solve the issue
Thank you for the help in advance...
After reshaping conv4 at line fc1 = tf.reshape(conv4,[256,-1]), the shape of fc1 is (256, 2048) and the weight matrix W_fc1 has shape (256, 1024). Thus, you get a size incompatible error at the next line fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
in the matrix multiplication part. I suggest you to go through the dimensions at every step manually to find errors in future.
I'm getting an incompatible shape error when trying trying to add a CNN to a ready siamese code that I got from github : here is the link :
https://github.com/ywpkwon/siamese_tf_mnist
here is the code for running the session:
""" Siamese implementation using Tensorflow with MNIST example.
This siamese network embeds a 28x28 image (a point in 784D)
into a point in 2D.
By Youngwook Paul Kwon (young at berkeley.edu)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
#import system things
from tensorflow.examples.tutorials.mnist import input_data # for data
import tensorflow as tf
import numpy as np
import os
#import helpers
import inference
import visualize
# prepare data and tf.session
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
sess = tf.InteractiveSession()
# setup siamese network
siamese = inference.siamese();
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(siamese.loss)
saver = tf.train.Saver()
tf.initialize_all_variables().run()
# start training
if new:
for step in range(1000):
batch_x1, batch_y1 = mnist.train.next_batch(128)
batch_x2, batch_y2 = mnist.train.next_batch(128)
batch_y = (batch_y1 == batch_y2).astype('float')
_, loss_v = sess.run([train_step, siamese.loss], feed_dict={
siamese.x1: batch_x1,
siamese.x2: batch_x2,
siamese.y_: batch_y})
if step % 10 == 0:
print ('step %d: loss' % (step))
print (loss_v)
here is the code for creating the Siamese model.
import tensorflow as tf
class siamese:
# Create model
def __init__(self):
self.x1 = tf.placeholder(tf.float32, [None, 784])
self.x2 = tf.placeholder(tf.float32, [None, 784])
with tf.variable_scope("siamese") as scope:
self.o1 = self.network(self.x1)
scope.reuse_variables()
self.o2 = self.network(self.x2)
# Create loss
self.y_ = tf.placeholder(tf.float32, [None])
self.loss = self.loss_with_step()
def network(self, x):
weights = []
fc1 = self.fc_layer(x, 1024, "fc1" , [5, 5, 1, 32])
return fc1
def fc_layer(self, bottom, n_weight, name,kernel_shape ): #[5, 5, 1, 32]
assert len(bottom.get_shape()) == 2
#n_prev_weight = bottom.get_shape()[1]
initer = tf.truncated_normal_initializer(stddev=0.01)
weights_for_convolution = tf.get_variable(name+"weights_for_convolution", kernel_shape,
initializer=tf.random_normal_initializer())
bias_shape = kernel_shape[-1]
biases_for_convolution = tf.get_variable(name+"biases_for_convolution", [bias_shape],
initializer=tf.constant_initializer(0.1))
biases_for_connected_layer = tf.get_variable(name+"biases_for_connected_layer", [1024],
initializer=tf.constant_initializer(0.1))
weights_for_connected_layer = tf.get_variable(name+"weights_for_connected_layer", [7*7*64,1024],
initializer=tf.random_normal_initializer())
W = tf.get_variable(name+'W', dtype=tf.float32, shape=[1024,2], initializer=initer)
b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[2], dtype=tf.float32))
#weights_for_readout_layer = tf.get_variable("weights_for_readout_layer", [1024,2],
#initializer=tf.random_normal_initializer())
#biases_for_readout_layer = tf.get_variable("biases_for_readout_layer", [2],
#initializer=tf.constant_initializer(0.1))
bottom1 = tf.reshape(bottom,[-1,28,28,1]) ##
c2 = tf.nn.conv2d(bottom1, weights_for_convolution, strides=[1, 1, 1, 1], padding='SAME')
conv = tf.nn.bias_add(c2, biases_for_convolution)
relu = tf.nn.relu(conv)
out = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#print tf.shape(out)
h_out_flat = tf.reshape(out ,[-1,7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_out_flat, weights_for_connected_layer) + biases_for_connected_layer)
#compute model output
final_output = tf.matmul(h_fc1,W) + b
#fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
return final_output
def loss_with_spring(self):
margin = 5.0
labels_t = self.y_
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
print tf.shape(eucd2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
# yi*||CNN(p1i)-CNN(p2i)||^2 + (1-yi)*max(0, C-||CNN(p1i)-CNN(p2i)||^2)
pos = tf.multiply(labels_t, eucd2, name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.subtract(0.0,eucd2), name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C,eucd2)), name="Nyi_x_C-eucd_xx_2")
neg = tf.multiply(labels_f, tf.pow(tf.maximum(tf.subtract(C, eucd), 0), 2), name="Nyi_x_C-eucd_xx_2")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
def loss_with_step(self):
margin = 5.0
labels_t = self.y_ #128
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C, eucd)), name="Ny_C-eucd")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
Actually as the batch size is 128 label-t is 128,
the problem here is that the euclidean distance in the loss_with_step function,
as well as in the loss_with_spring function is of size 256 and not 128 I don't really know why!
here is the error I get.
Traceback (most recent call last):
File "run1.py", line 56, in <module>
siamese.y_: batch_y})
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [128] vs. [256]
[[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost/ replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]
Caused by op u'y_x_eucd', defined at:
File "run1.py", line 28, in <module>
siamese = inference1.siamese();
File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master /inference1.py", line 18, in __init__
self.loss = self.loss_with_step()
File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master /inference1.py", line 110, in loss_with_step
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/ops/math_ops.py", line 286, in multiply
return gen_math_ops._mul(x, y, name)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/ops/gen_math_ops.py", line 1377, in _mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/op_def_library.py", line 767, in apply
_op
op_def=op_def)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/ops.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [128] vs. [256]
[[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost /replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]
can anyone help?
Looks like your reshaping after the convolution is wrong. The output of the convolution layer would be 14x14x32 for a 28x28x1 input passed through conv(stride=1)-maxpool(stride 2). So you need to change the flatten layer to :
h_out_flat = tf.reshape(out ,[-1,14*14*32])
and also the weights_for_connected_layer appropriately.
I have 70 training sample, 10 testing samples, with every sample contains 11*99 elements. I want to use LSTM to classify the testing samples, here is the code:
import tensorflow as tf
import scipy.io as sc
# data read
feature_training = sc.loadmat("feature_training_reshaped.mat")
feature_training_reshaped = feature_training['feature_training_reshaped']
print (feature_training_reshaped.shape)
feature_testing = sc.loadmat("feature_testing_reshaped.mat")
feature_testing_reshaped = feature_testing['feature_testing_reshaped']
print (feature_testing_reshaped.shape)
label_training = sc.loadmat("label_training.mat")
label_training = label_training['aa']
print (label_training.shape)
label_testing = sc.loadmat("label_testing.mat")
label_testing = label_testing['label_testing']
print (label_testing.shape)
a=feature_training_reshaped.reshape([70, 11, 99])
b=feature_testing_reshaped.reshape([10, 11, 99])
print (a.shape)
# hyperparameters
lr = 0.001
training_iters = 1000
batch_size = 70
n_inputs = 99 # MNIST data input (img shape: 11*99)
n_steps = 11 # time steps
n_hidden_units = 128 # neurons in hidden layer
n_classes = 2 # MNIST classes (0-9 digits)
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
# Define weights
weights = {
# (28, 128)
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
# (128, 10)
'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
# (128, )
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
# (10, )
'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}
def RNN(X, weights, biases):
# hidden layer for input to cell
########################################
# all the data in this batch flow into this layer in one time
# transpose the inputs shape from 70batch, 11steps,99inputs
# X ==> (70 batch * 11 steps, 99 inputs)
X = tf.reshape(X, [-1, n_inputs])
# into hidden
# X_in = (70 batch * 11 steps, 99 inputs)
X_in = tf.matmul(X, weights['in']) + biases['in']
# another shape transpose X_in ==> (70 batch, 11 steps, 128 hidden),
X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
# cell
##########################################
# basic LSTM Cell.
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
# lstm cell is divided into two parts (c_state, h_state)
##### TAKE Care, batch_size should be 10 when the testing dataset only has 10 data
_init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
print ("_init_state:", _init_state)
# You have 2 options for following step.
# 1: tf.nn.rnn(cell, inputs);
# 2: tf.nn.dynamic_rnn(cell, inputs).
# If use option 1, you have to modified the shape of X_in, go and check out this:
# https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py
# In here, we go for option 2.
# dynamic_rnn receive Tensor (batch, steps, inputs) or (steps, batch, inputs) as X_in.
# Make sure the time_major is changed accordingly.
outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=_init_state, time_major=False)
# outputs size would be a tensor [70,11,128]; size of X_in is (70 batch, 11 steps, 128 hidden)
# final_state size would be [batch_size, outputs],which is [70,128]
print (outputs)
print (final_state)
# hidden layer for output as the final results
#############################################
results = tf.matmul(final_state[1], weights['out']) + biases['out']
# # or
# unpack to list [(batch, outputs)..] * steps
# outputs = tf.unpack(tf.transpose(outputs, [1, 0, 2])) # states is the last outputs
# results = tf.matmul(outputs[-1], weights['out']) + biases['out']
return results
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
step = 0
while step * batch_size < training_iters:
# batch_xs, batch_ys = fea.next_batch(batch_size)
# batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
sess.run([train_op], feed_dict={
x: a,
y: label_training,
})
if step % 10 == 0:
print(sess.run(accuracy, feed_dict={
x: b,
y: label_testing,
}))
step += 1
At last, I got the result & error:
(770, 99)
(110, 99)
(70, 2)
(10, 2)
(70, 11, 99)
('_init_state:', LSTMStateTuple(c=<tf.Tensor 'zeros:0' shape=(70, 128) dtype=float32>, h=<tf.Tensor 'zeros_1:0' shape=(70, 128) dtype=float32>))
Tensor("RNN/transpose:0", shape=(70, 11, 128), dtype=float32)
LSTMStateTuple(c=<tf.Tensor 'RNN/while/Exit_2:0' shape=(70, 128) dtype=float32>, h=<tf.Tensor 'RNN/while/Exit_3:0' shape=(70, 128) dtype=float32>)
Traceback (most recent call last):
File "/home/xiangzhang/RNN.py", line 150, in <module>
y: label_testing,
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 717, in run
run_metadata_ptr)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 915, in _run
feed_dict_string, options, run_metadata)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 965, in _do_run
target_list, options, run_metadata)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 985, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [10,128] vs. shape[1] = [70,128]
[[Node: RNN/while/BasicLSTMCell/Linear/concat = Concat[N=2, T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](RNN/while/BasicLSTMCell/Linear/concat/concat_dim, RNN/while/TensorArrayRead, RNN/while/Identity_3)]]
Caused by op u'RNN/while/BasicLSTMCell/Linear/concat', defined at:
File "/home/xiangzhang/RNN.py", line 128, in <module>
pred = RNN(x, weights, biases)
File "/home/xiangzhang/RNN.py", line 110, in RNN
outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=_init_state, time_major=False)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 836, in dynamic_rnn
dtype=dtype)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 1003, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2518, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2356, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2306, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 988, in _time_step
(output, new_state) = call_cell()
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 974, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell.py", line 310, in __call__
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell.py", line 907, in _linear
res = math_ops.matmul(array_ops.concat(1, args), matrix)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 872, in concat
name=name)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 436, in _concat
values=values, name=name)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 749, in apply_op
op_def=op_def)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2380, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/xiangzhang/.local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1298, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs should match: shape[0] = [10,128] vs. shape[1] = [70,128]
[[Node: RNN/while/BasicLSTMCell/Linear/concat = Concat[N=2, T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](RNN/while/BasicLSTMCell/Linear/concat/concat_dim, RNN/while/TensorArrayRead, RNN/while/Identity_3)]]
Process finished with exit code 1
I thought the reason maybe is the testing dataset is only 10, less than batch_size=70, so that when I run the testing dataset, the code _init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32) would has the unmatch error.
There are two ways to solve it but I don't know how to implement any of it neither:
change the batch_size value, set it as 70 when training, 10 when testing. But, I don't know how to code it, please tell me how to do?
Or, I can set the batch_size=10 , and automatically read the training dataset ten by ten. Also, I don't know how to read next batch in tensorflow automatically, and the command next_batch in MNIST dataset can not work.
The second solution is particular important, please kindly to help me, thanks very much.
I'm using the code pasted below. The 'forward' part of the code seems to work by virtue of the "assert root_emb == 1 + emb[0] * emb[1]" passing. However, once a training step is taken (the line following the assert), a strange error appears suggesting an issue with the TensorArray written to during the wihle loop.
tensorflow.python.framework.errors.InvalidArgumentError: TensorArray
TensorArray#gradients: Could not read from TensorArray index 2 because
it has not yet been written to. [[Node:
gradients/while/TensorArrayWrite_grad/TensorArrayRead =
TensorArrayRead[_class=["loc:#TensorArray"], dtype=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/cpu:0"](gradients/while/TensorArrayWrite_grad/TensorArrayGrad/TensorArrayGrad,
gradients/while/TensorArrayWrite_grad/TensorArrayRead/StackPop,
gradients/while/TensorArrayWrite_grad/TensorArrayGrad/gradient_flow)]]
Caused by op u'gradients/while/TensorArrayWrite_grad/TensorArrayRead',
defined at: File "minimal.py", line 82, in
model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 61, in init
self.grad = tf.gradients(self.loss, self.params) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gradients.py",
line 481, in gradients
in_grads = _AsList(grad_fn(op, *out_grads)) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_grad.py",
line 115, in _TensorArrayWriteGrad
grad = g.read(index) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py",
line 177, in read
dtype=self._dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py",
line 781, in _tensor_array_read
flow_in=flow_in, dtype=dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py",
line 694, in apply_op
op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py",
line 2154, in create_op
original_op=self._default_original_op, op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py",
line 1154, in init
self._traceback = _extract_stack()
...which was originally created as op u'while/TensorArrayWrite',
defined at: File "minimal.py", line 82, in
model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 50, in init
loop_vars=(self.time, node_emb, tf.zeros([1]))) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1681, in While
back_prop=back_prop, swap_memory=swap_memory, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1671, in while_loop
result = context.BuildLoop(cond, body, loop_vars) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1572, in BuildLoop
body_result = body(*vars_for_body_with_tensor_arrays) File "minimal.py", line 43, in _recurrence
new_node_emb = node_emb.write(children_and_parent[-1], parent_emb) File
"/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py",
line 200, in write
name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py",
line 875, in _tensor_array_write
value=value, flow_in=flow_in, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py",
line 694, in apply_op
op_def=op_def)
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import tensor_array_ops, control_flow_ops
class TreeRNN(object):
def __init__(self, num_emb, emb_dim, output_dim, degree=2, learning_rate=0.01):
self.num_emb = num_emb
self.emb_dim = emb_dim
self.output_dim = output_dim
self.degree= degree
self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
self.embeddings = tf.Variable(self.init_matrix([self.num_emb, self.emb_dim]))
self.recursive_unit = self.create_recursive_unit()
self.W_out = tf.Variable(self.init_matrix([self.output_dim, self.emb_dim]))
self.b_out = tf.Variable(self.init_vector([self.output_dim]))
self.x = tf.placeholder(tf.int32, shape=[None]) # word indices
self.tree = tf.placeholder(tf.int32, shape=[None, self.degree + 1])
self.y = tf.placeholder(tf.float32, shape=[self.output_dim])
num_words, = tf.unpack(tf.shape(self.x), 1) # also num leaves
emb_x = tf.gather(self.embeddings, self.x)
node_emb = tensor_array_ops.TensorArray(
dtype=tf.float32, size=num_words - 1, dynamic_size=True,
clear_after_read=False)
node_emb = node_emb.unpack(emb_x)
num_nodes, _ = tf.unpack(tf.shape(self.tree), 2) # num internal nodes
tree_traversal = tensor_array_ops.TensorArray(
dtype=tf.int32, size=num_nodes)
tree_traversal = tree_traversal.unpack(self.tree)
def _recurrence(t, node_emb, _):
node_info = tree_traversal.read(t)
children_and_parent = tf.unpack(node_info, self.degree + 1)
child_emb = []
for i in xrange(self.degree):
child_emb.append(node_emb.read(children_and_parent[i]))
parent_emb = self.recursive_unit(child_emb)
new_node_emb = node_emb.write(children_and_parent[-1], parent_emb)
return t + 1, new_node_emb, parent_emb
self.time = tf.constant(0, dtype=tf.int32, name='time')
_, _, final_emb = control_flow_ops.While(
cond=lambda t, _1, _2: t < num_nodes,
body=_recurrence,
loop_vars=(self.time, node_emb, tf.zeros([1])))
self.final_state = final_emb
self.pred_y = self.activation(
tf.matmul(self.W_out, tf.reshape(self.final_state, [self.emb_dim, 1]))
+ self.b_out)
self.loss = self.loss_fn(self.y, self.pred_y)
self.params = tf.trainable_variables()
opt = tf.train.GradientDescentOptimizer(self.learning_rate)
self.grad = tf.gradients(self.loss, self.params)
self.updates = opt.apply_gradients(zip(self.grad, self.params))
def init_matrix(self, shape):
return tf.random_normal(shape, stddev=0.1)
def init_vector(self, shape):
return tf.zeros(shape)
def create_recursive_unit(self):
def unit(child_emb): # very simple
return 1 + child_emb[0] * child_emb[1]
return unit
def activation(self, inp):
return tf.sigmoid(inp)
def loss_fn(self, y, pred_y):
return tf.reduce_sum(tf.square(y - pred_y))
model = TreeRNN(8, 1, 1, degree=2)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
root_emb = sess.run([model.final_state],
feed_dict={model.x: np.array([0, 1]), model.tree: np.array([[0, 1, 2]])})
emb, = sess.run([model.embeddings])
assert root_emb == 1 + emb[0] * emb[1]
out = sess.run([model.updates, model.loss],
feed_dict={model.x: np.array([0, 1]),
model.tree: np.array([[0, 1, 2]]),
model.y: np.array([0])})
set parallel_iterations=1 in tf.while_loop