Error on tensorflow: Shape must be rank 2 but is rank 1 for 'MatMul_25' - tensorflow

I'm trying to create a conditional GAN. However, i'm stuck as to why no matter what i do, it appears the same error over and over again.
Here's the code:
image_dim = 784 #28 * 28
Y_dimension = 10
gen_hidd_dim = 256
disc_hidd_dim = 256
z_noise_dim =100 #input noise datapoint
def xavier_init(shape):
return tf.random_normal(shape = shape, stddev = 1/tf.sqrt(shape[0]/2.0))
weights = {
'disc_H' : tf.Variable(xavier_init([image_dim + Y_dimension, disc_hidd_dim])),
'disc_final' : tf.Variable(xavier_init([disc_hidd_dim, 1])),
'gen_H': tf.Variable([z_noise_dim + Y_dimension, gen_hidd_dim]),
'gen_final': tf.Variable(xavier_init([gen_hidd_dim, image_dim]))
}
bias = {
'disc_H': tf.Variable(xavier_init([disc_hidd_dim])),
'disc_final': tf.Variable(xavier_init([1])),
'gen_H': tf.Variable(xavier_init([gen_hidd_dim])),
'gen_final': tf.Variable(xavier_init([image_dim]))
}
Z_input = tf.placeholder(tf.float32, shape= [None, z_noise_dim ], name = 'input_noise')
Y_input = tf.placeholder(tf.float32, shape= [None, Y_dimension], name='Labels')
X_input = tf.placeholder(tf.float32, shape=[None, image_dim], name = 'real_input')
def Discriminator(x,y):
inputs = tf.concat(axis = 1, values = [x,y])
hidden_layer = tf.nn.relu(tf.add(tf.matmul(inputs, weights['disc_H']), bias['disc_H']))
final_layer = tf.add(tf.matmul(hidden_layer, weights['disc_final']), bias['disc_final'])
disc_output = tf.nn.sigmoid(final_layer)
return final_layer, disc_output
def Generator(x,y):
inputs = tf.concat(axis=1, values=[x,y])
hidden_layer = tf.nn.relu(tf.add(tf.matmul(tf.cast(inputs, tf.float32), tf.cast(weights['gen_H'], tf.float32)), tf.cast(bias['gen_H'],tf.float32)))
final_layer = tf.add(tf.matmul(hidden_layer, weights['gen_final']), bias['gen_final'])
gen_output = tf.nn.sigmoid(final_layer)
return gen_output
output_Gen = Generator(Z_input, Y_input)
Right after executing the Generator i get the following error:
ValueError: Shape must be rank 2 but is rank 1 for 'MatMul_25' (op: 'MatMul') with input shapes: [?,110], [2].
What to do?

I think you just missed one call to xavier_init() when initialising your weights.
You have this:
weights = {
'disc_H' : tf.Variable(xavier_init([image_dim + Y_dimension, disc_hidd_dim])),
'disc_final' : tf.Variable(xavier_init([disc_hidd_dim, 1])),
'gen_H': tf.Variable([z_noise_dim + Y_dimension, gen_hidd_dim]),
'gen_final': tf.Variable(xavier_init([gen_hidd_dim, image_dim]))
}
but I think you want this:
weights = {
'disc_H' : tf.Variable(xavier_init([image_dim + Y_dimension, disc_hidd_dim])),
'disc_final' : tf.Variable(xavier_init([disc_hidd_dim, 1])),
'gen_H': tf.Variable(xavier_init([z_noise_dim + Y_dimension, gen_hidd_dim])),
'gen_final': tf.Variable(xavier_init([gen_hidd_dim, image_dim]))
}
The error message was because weights['gen_H'] had shape [2] whereas you expected it to have shape [110, 256]. This meant that the call to tf.matmul() failed because it's impossible to matrix multiply a matrix with shape [m, 110] by a matrix of shape [2]

Related

ValueError: Error when checking target: expected dense_1 to have 2 dimensions, but got array with shape (1188, 12, 2)

So when I was trying to build a lstm network, every time it tells me that "ValueError: Error when checking target: expected dense_1 to have 2 dimensions, but got array with shape (1188, 12, 2)".
My dataset has more than 1000 samples, 2 features, and I set the time_step as 12.
I have already reshaped my dataset to 3-dim, however, the error tells that my last layer-Dense layer(I use this layer as output) expected a 2-dimention array.What shell I do?
My codes are as follows:
# read train set
readColsPro = (7, 20)
filename = 'train_set.txt'
xProTrain_1 = readCsv.csvMat(filename, 1, cols=readColsPro, rows=[0, 1200])
yProTrain_1 = readCsv.csvMat(filename, 1, cols=readColsPro, rows=[1, 1201])
xProTrain_1 = xProTrain_1.reshape(xProTrain_1.shape[0], 2)
yProTrain_1 = yProTrain_1.reshape(yProTrain_1.shape[0], 2)
# erase 'nan' datas
for i in xProTrain_1:
if np.isnan(i[1]):
i[1] = 0
for i in yProTrain_1:
if np.isnan(i[1]):
i[1] = 0
# read test set
xProTest_1 = readCsv.csvMat(filename, 1, cols=readColsPro, rows=[1, 1201])
yProTest_1 = readCsv.csvMat(filename, 1, cols=readColsPro, rows=[2, 1202])
xProTest_1 = np.reshape(xProTest_1, (xProTest_1.shape[0], xProTest_1.shape[1]))
yProTest_1 = np.reshape(yProTest_1, (yProTest_1.shape[0], yProTest_1.shape[1]))
for i in xProTest_1:
if np.isnan(i[1]):
i[1] = 0
for i in yProTest_1:
if np.isnan(i[1]):
i[1] = 0
# parameters
timeStepPro = 12
epoch = 24
batch_size = 24
trainNumPro = xProTrain_1.shape[0]
testNumPro = yProTrain_1.shape[0]
# reshape datas to 3D
xProTrain_2 = []
for i in range(timeStepPro, trainNumPro):
xProTrain_2.append(xProTrain_1[i - timeStepPro:i])
xProTrain_2 = np.array(xProTrain_2)
yProTrain_2 = []
for i in range(timeStepPro, trainNumPro):
yProTrain_2.append(yProTrain_1[i - timeStepPro:i])
yProTrain_2 = np.array(yProTrain_2)
print(xProTrain_2.shape)
print(yProTrain_2.shape)
# reshape datas to 3D
xProTest_2 = []
for i in range(timeStepPro, trainNumPro):
xProTest_2.append(xProTest_1[i - timeStepPro:i])
xProTest_2 = np.array(xProTest_2)
yProTest_2 = []
for i in range(timeStepPro, trainNumPro):
yProTest_2.append(yProTest_1[i - timeStepPro:i])
yProTest_2 = np.array(yProTest_2)
# define network
modelA = Sequential()
modelA.add(LSTM(units=64, return_sequences=True,
input_shape=[xProTrain_2.shape[1], 2]))
modelA.add(BatchNormalization())
modelA.add(LSTM(units=128, return_sequences=True))
modelA.add(LSTM(units=128, return_sequences=True))
modelA.add(LSTM(units=256, return_sequences=True))
modelA.add(LSTM(units=64, return_sequences=False))
modelA.add(Dense(units=2, activation='relu'))
modelA.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['accuracy'])
modelA.fit(x=xProTrain_2, y=yProTrain_2, epochs=epoch, batch_size=batch_size)
Error message are as follows:
ValueError: Error when checking target: expected dense_1 to have 2 dimensions, but got array with shape (1188, 12, 2)

How do I make the bounding boxes in yolo v3 tighter (closer to the objects)?

I'm following this Repo on creating Yolo v3 model from scratch in PyTorch. The only problem is that the bounding boxes are not as tight (close to the objects) in most images I tried. I compared them to the tutorial on creating Yolo v3 model but using TensorFlow. The tensorflow model produces excellent bounding boxed that are as tight as possible to the objects.
I tried to understand how the calculations are different between the two, but I'm finding myself getting stuck with the differences between torch and tf.
I believe the code for the bounding boxes in the tf tutorial comes from here:
def yolo_layer(inputs, n_classes, anchors, img_size, data_format):
"""Creates Yolo final detection layer.
Detects boxes with respect to anchors.
Args:
inputs: Tensor input.
n_classes: Number of labels.
anchors: A list of anchor sizes.
img_size: The input size of the model.
data_format: The input format.
Returns:
Tensor output.
"""
n_anchors = len(anchors)
inputs = tf.layers.conv2d(inputs, filters=n_anchors * (5 + n_classes),
kernel_size=1, strides=1, use_bias=True,
data_format=data_format)
shape = inputs.get_shape().as_list()
grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3]
if data_format == 'channels_first':
inputs = tf.transpose(inputs, [0, 2, 3, 1])
inputs = tf.reshape(inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1],
5 + n_classes])
strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1])
box_centers, box_shapes, confidence, classes = \
tf.split(inputs, [2, 2, 1, n_classes], axis=-1)
x = tf.range(grid_shape[0], dtype=tf.float32)
y = tf.range(grid_shape[1], dtype=tf.float32)
x_offset, y_offset = tf.meshgrid(x, y)
x_offset = tf.reshape(x_offset, (-1, 1))
y_offset = tf.reshape(y_offset, (-1, 1))
x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
x_y_offset = tf.tile(x_y_offset, [1, n_anchors])
x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
box_centers = tf.nn.sigmoid(box_centers)
box_centers = (box_centers + x_y_offset) * strides
anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1])
box_shapes = tf.exp(box_shapes) * tf.to_float(anchors)
confidence = tf.nn.sigmoid(confidence)
classes = tf.nn.sigmoid(classes)
inputs = tf.concat([box_centers, box_shapes,
confidence, classes], axis=-1)
return inputs
While the code for the bounding boxes for the pytorch model comes from here, and the explanation:
def bbox_iou(box1, box2):
"""
Returns the IoU of two bounding boxes
"""
#Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
#get the corrdinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
#Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
#Union Area
b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area)
return iou
def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
batch_size = prediction.size(0)
stride = inp_dim // prediction.size(2)
grid_size = inp_dim // stride
bbox_attrs = 5 + num_classes
num_anchors = len(anchors)
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
prediction = prediction.transpose(1,2).contiguous()
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
#Sigmoid the centre_X, centre_Y. and object confidencce
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
#Add the center offsets
grid = np.arange(grid_size)
a,b = np.meshgrid(grid, grid)
x_offset = torch.FloatTensor(a).view(-1,1)
y_offset = torch.FloatTensor(b).view(-1,1)
if CUDA:
x_offset = x_offset.cuda()
y_offset = y_offset.cuda()
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
prediction[:,:,:2] += x_y_offset
#log space transform height and the width
anchors = torch.FloatTensor(anchors)
if CUDA:
anchors = anchors.cuda()
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
prediction[:,:,:4] *= stride
return prediction

ctc_loss error "No valid path found."

Training a model with tf.nn.ctc_loss produces an error every time the train op is run:
tensorflow/core/util/ctc/ctc_loss_calculator.cc:144] No valid path found.
Unlike in previous questions about this function, this is not due to divergence. I have a low learning rate, and the error occurs on even the first train op.
The model is a CNN -> LSTM -> CTC. Here is the model creation code:
# Build Graph
self.videoInput = tf.placeholder(shape=(None, self.maxVidLen, 50, 100, 3), dtype=tf.float32)
self.videoLengths = tf.placeholder(shape=(None), dtype=tf.int32)
self.keep_prob = tf.placeholder(dtype=tf.float32)
self.targets = tf.sparse_placeholder(tf.int32)
self.targetLengths = tf.placeholder(shape=(None), dtype=tf.int32)
conv1 = tf.layers.conv3d(self.videoInput ...)
pool1 = tf.layers.max_pooling3d(conv1 ...)
conv2 = ...
pool2 = ...
conv3 = ...
pool3 = ...
cnn_out = tf.reshape(pool3, shape=(-1, self.maxVidLength, 4*7*96))
fw_cell = tf.nn.rnn_cell.MultiRNNCell(self.cell(), for _ in range(3))
bw_cell = tf.nn.rnn_cell.MultiRNNCell(self.cell(), for _ in range(3))
outputs, _ = tf.nn.bidirectional_dynamic_rnn(
fw_cell, bw_cell, cnn_out, sequence_length=self.videoLengths, dtype=tf.float32)
outputs = tf.concat(outputs, 2)
outputs = tf.reshape(outputs, [-1, self.hidden_size * 2])
w = tf.Variable(tf.random_normal((self.hidden_size * 2, len(self.char2index) + 1), stddev=0.2))
b = tf.Variable(tf.zeros(len(self.char2index) + 1))
out = tf.matmul(outputs, w) + b
out = tf.reshape(out, [-1, self.maxVidLen, len(self.char2index) + 1])
out = tf.transpose(out, [1, 0, 2])
cost = tf.reduce_mean(tf.nn.ctc_loss(self.targets, out, self.targetLengths))
self.train_op = tf.train.AdamOptimizer(0.0001).minimize(cost)
And here is the feed dict creation code:
indices = []
values = []
shape = [len(vids) * 2, self.maxLabelLen]
vidInput = np.zeros((len(vids) * 2, self.maxVidLen, 50, 100, 3), dtype=np.float32)
# Actual video, then left-right flip
for j in range(len(vids) * 2):
# K is video index
k = j if j < len(vids) else j - len(vids)
# convert video and label to input format
vidInput[j, 0:len(vids[k])] = vids[k] if k == j else vids[k][:,::-1,:]
indices.extend([j, i] for i in range(len(labelList[k])))
values.extend(self.char2index[c] for c in labelList[k])
fd[self.targets] = (indices, values, shape)
fd[self.videoInput] = vidInput
# Collect video lengths and label lengths
vidLengths = [len(j) for j in vids] + [len(j) for j in vids]
labelLens = [len(l) for l in labelList] + [len(l) for l in labelList]
fd[self.videoLengths] = vidLengths
fd[self.targetLengths] = labelLens
It turns out that the ctc_loss requires that the label lengths be shorter than the input lengths. If the label lengths are too long, the loss calculator cannot unroll completely and therefore cannot compute the loss.
For example, the label BIFI would require input length of at least 4 while the label BIIF would require input length of at least 5 due to a blank being inserted between the repeated symbols.
I had the same issue but I soon realized it was just because I was using glob and my label was in the filename so it was exceeding.
You can fix this issue by using:
os.path.join(*(filename.split(os.path.sep)[noOfDir:]))
For me the problem was fixed by setting preprocess_collapse_repeated=True.
FWIW: My target sequence length was already shorter than inputs, and the RNN outputs are that of softmax.
Another possible reason which I found out in my case is the input data range is not normalized to 0~1, due to that LSTM activation function becomes saturated in the beginning of the training, and causes "no valid path" log somehow.

Tensorflow - apply function over 1D Tensor

I have a function dice
def dice(yPred,yTruth,thresh):
smooth = tf.constant(1.0)
threshold = tf.constant(thresh)
yPredThresh = tf.to_float(tf.greater_equal(yPred,threshold))
mul = tf.mul(yPredThresh,yTruth)
intersection = 2*tf.reduce_sum(mul) + smooth
union = tf.reduce_sum(yPredThresh) + tf.reduce_sum(yTruth) + smooth
dice = intersection/union
return dice, yPredThresh
which works. An example is given here
with tf.Session() as sess:
thresh = 0.5
print("Dice example")
yPred = tf.constant([0.1,0.9,0.7,0.3,0.1,0.1,0.9,0.9,0.1],shape=[3,3])
yTruth = tf.constant([0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0],shape=[3,3])
diceScore, yPredThresh= dice(yPred=yPred,yTruth=yTruth,thresh= thresh)
diceScore_ , yPredThresh_ , yPred_, yTruth_ = sess.run([diceScore,yPredThresh,yPred, yTruth])
print("\nScore = {0}".format(diceScore_))
>>> Score = 0.899999976158
I would like to be able to loop over the third arguement of dice, thresh. I do not know the best way to do this such that I can extract it from the graph. Something along the lines of the following...
def diceROC(yPred,yTruth,thresholds=np.linspace(0.1,0.9,20)):
thresholds = thresholds.astype(np.float32)
nThreshs = thresholds.size
diceScores = tf.zeros(shape=nThreshs)
for i in xrange(nThreshs):
score,_ = dice(yPred,yTruth,thresholds[i])
diceScores[i] = score
return diceScores
Evaluating diceScoreROC yields the error 'Tensor' object does not support item assignment as I can't loop into and slice a tf tensor apparently.
Instead of the loop, I would encourage you to use broadcasting abilities of tensorflow. If you redefine dice to:
def dice(yPred,yTruth,thresh):
smooth = tf.constant(1.0)
yPredThresh = tf.to_float(tf.greater_equal(yPred,thresh))
mul = tf.mul(yPredThresh,yTruth)
intersection = 2*tf.reduce_sum(mul, [0, 1]) + smooth
union = tf.reduce_sum(yPredThresh, [0, 1]) + tf.reduce_sum(yTruth, [0, 1]) + smooth
dice = intersection/union
return dice, yPredThresh
You will be able to pass 3-dimensional yPred and yTruth (assuming the tensors will be just repeated along the last dimension) and 1-dimensional thresh:
with tf.Session() as sess:
thresh = [0.1,0.9,20, 0.5]
print("Dice example")
yPred = tf.constant([0.1,0.9,0.7,0.3,0.1,0.1,0.9,0.9,0.1],shape=[3,3,1])
ypred_tiled = tf.tile(yPred, [1,1,4])
yTruth = tf.constant([0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0],shape=[3,3,1])
ytruth_tiled = tf.tile(yTruth, [1,1,4])
diceScore, yPredThresh= dice(yPred=ypred_tiled,yTruth=ytruth_tiled,thresh= thresh)
diceScore_ = sess.run(diceScore)
print("\nScore = {0}".format(diceScore_))
You'll get:
Score = [ 0.73333335 0.77777779 0.16666667 0.89999998]

Can embedding_rnn_seq2seq function return all states?

I am playing Seq2Seq model use embedding_rnn_seq2seq function ,
i read document say embedding_rnn_seq2seq return outputs and state that in each time-step ,
but i try to get state only can get one step
here is my model
seq_length = 100
batch_size = 128
vocab_size = 12
memory_dim = 100
enc_inp = [tf.placeholder(tf.int32, shape=(None,), name="inp%i" % t) for t in range(seq_length)]
labels = [tf.placeholder(tf.int32, shape=(None,), name="labels%i" % t) for t in range(seq_length)]
dec_inp = ([tf.zeros_like(labels[0], dtype=np.int32, name="GO")] + labels[:-1])
weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels]
cell = rnn_cell.GRUCell(memory_dim)
dec_outputs, dec_memory = seq2seq.embedding_rnn_seq2seq(enc_inp,dec_inp,cell,vocab_size,vocab_size,vocab_size)
loss = seq2seq.sequence_loss(dec_outputs, labels, weights, vocab_size)
try to get state (dec_memory)
dec_memory_batch = sess.run(dec_memory , feed_dict)
only return a one-step memory_dim size vector , maybe is last step state
So , do anyone have some advice ?