Hoo]w can I fix this error RuntimeError: For unbatched 2-D input, hx should also be 2-D but got 3-D tenso - resize

I am building a GRU network and I keep getting this error. I tried to unqueeze hx but it can't be done as I can pass the reference before de declaration. When I print the size of both h and x, they are in fact different (x torch.Size([1024, 6] / h torch.Size([2, 1024, 256])).
Here is the code:
input_dim = 50
hidden_dim = 128
n_layers =9
output_dim =64
class GRUNet(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
super(GRUNet, self).__init__()
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x: Tensor, h: Optional[Tensor] = None) -> Tensor:
print(x.size())
print(h.size())
#h.view(-1)
#h = torch.rand(2, 6, 1024)[:, :, -1]
#x = torch.rand(2, 6, 1024)[:, :, -1]
#print(f" size of x : {x.size()}.")
#print(f" size of h : {h.size()}.")
out, h = self.gru(x, h)
#hx = hx.unsqueeze(1)
out = self.fc(self.relu(out[:,-1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
return hidden

Related

Error in defining custom layers using subclasses in keras

I am getting an error in defining layers to my model. The input for self.layer[i] is not working when I'm implementing X = self.layers[i](A, A)inside the code. But the layer[i] which is made from GTLayer, works well for when I call it separetly, A = tf.random.uniform(shape=[2,2]) d = GTLayer(2,2,1) d.call(A,A)
..
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class GTN(keras.Model): # layers.Layer keeps track of everything under the hood!
def __init__(self, num_edge, num_channels, w_in, w_out, num_class,num_layers,norm):
super(GTN, self).__init__()
self.num_layers = 3
self.num_edge = 6
self.num_channels = 3
self.w_in = 2#tf.random.uniform(shape=[2,2])
self.w_out = 2#tf.random.uniform(shape=[2,2])
self.num_class =2
layers = []
for i in tf.range(num_layers):
print (4)
if i == 0:
layers.append(GTLayer(num_edge, num_channels, first=True))
else:
print(i)
layers.append(GTLayer(num_edge, num_channels, first=False))
#print ((self.w_out*self.num_channels, ))
self.linear1 = tf.keras.layers.Dense( self.w_out, input_shape =(self.w_out*self.num_channels, ), activation= None)
self.linear2 = tf.keras.layers.Dense( self.num_class, input_shape=(self.w_out, ), activation= None)
def call(self, A, X, target_x, target):
#A = tf.expand_dims(A, 0)
Ws = []
print('hello')
for i in range(self.num_layers):
print('i:',i)
if i == 0:
print('layers:',layers)
print('layers[i](A):', self.layers[i](A))
H, W = self.layers[i](A) #self.layers = nn.ModuleList(layers)
else:
#H = self.normalization(H)
print(H)
print(W)
print('A', A)
X = self.layers[i](A, A)
print('H_dash',self.layers[i](A))
H, W = self.layers[i](A, H)
Ws.append(W)
for i in range(self.num_channels):
if i==0:
X_ = tf.nn.relu(self.gcn_conv(X,H[i])).numpy()
else:
X_tmp = tf.nn.relu(self.gcn_conv(X,H[i])).numpy()
X_ = tf.concat((X_,X_tmp), dim=1)
X_ = self.linear1(X_)
X_ = tf.nn.relu(X_).numpy()
y = self.linear2(X_[target_x])
loss = self.loss(y, target)
return loss, y, Ws
class GTLayer(keras.layers.Layer):
def __init__(self, in_channels, out_channels, first=True):
super(GTLayer, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.first = first
def call(self, A, H_ = None):
if self.first == True:
a = tf.random.uniform(shape=[2,2])
b = tf.random.uniform(shape=[2,2])
print(a)
print(b)
#H = torch.bmm(np.array(a),np.array(b))
H = tf.matmul( a, b)
else:
a = tf.random.uniform(shape=[2])
H = tf.random.uniform(shape=[2])
return H,W
Input used for check:
d = GTN(2,3,4,2,2,2,1)
A = tf.random.uniform(shape=[2,2])
X = tf.random.uniform(shape=[2,2])
t_x = 5
t = 4
d.call(A,X,t_x,t)
Error:
---> 47 X = self.layers[i](A, A)
TypeError: call() takes 2 positional arguments but 3 were given

Trouble Training Same Tensorflow Model in PyTorch

I have trained a model in Tensorflow and am having trouble replicating it in PyTorch. The Tensorflow model achieves near 100% accuracy (the task is simple), but the PyTorch model performs at random. I've spent a while trying to figure this out, and can't understand what the problem could be.
The model is trained for the task of binary classification. Given an input utterance describing a quadrant and a (x, y, z) coordinate, the model has to predict if the (x, z) portion of the coordinate is in the quadrant described. For example, if the input text was "quadrant 1" and the coordinate was (0.5, -, 0.5), then the prediction should be true, but if the region was "quadrant 2" with the same coordinate, then the prediction should be false.
I generated some data and trained the model in Tensorflow using this code:
x_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="x_data")
y_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="y_data")
z_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="z_data")
# text and labels placeholders
text_data = tf.placeholder(tf.int32, [FLAGS.batch_size, maxtextlength])
text_lengths = tf.placeholder(tf.int32, [FLAGS.batch_size])
y_labels_placeholder = tf.placeholder(tf.int64, [FLAGS.batch_size])
# encode text and coordinate
embeddings = tf.Variable(tf.random_uniform([100, embedding_size], -1, -1))
rnn_inputs = tf.nn.embedding_lookup(embeddings, text_data)
rnn_layers = [tf.compat.v1.nn.rnn_cell.LSTMCell(size, initializer=tf.compat.v1.keras.initializers.glorot_normal) for size in [256]]
multi_rnn_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell(rnn_layers, state_is_tuple=True)
text_outputs, text_fstate = tf.compat.v1.nn.dynamic_rnn(cell=multi_rnn_cell,
inputs=rnn_inputs,
dtype=tf.float32, sequence_length=text_lengths)
# have fully connected layers to map them the input coordinates into the same dimension as the LSTM output layer from above
x_output_layer = tf.compat.v1.layers.dense(x_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='x_coordinate')
y_output_layer = tf.compat.v1.layers.dense(y_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='y_coordinate')
z_output_layer = tf.compat.v1.layers.dense(z_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='z_coordinate')
# add the representations
total_output_layer = x_output_layer + y_output_layer + z_output_layer + lstm_output_layer
# make the predictions with two fully connected layers
fc_1 = tf.compat.v1.layers.dense(total_output_layer, units=FLAGS.hidden_layer_size, activation=tf.nn.relu, name='fc_1')
logits = tf.compat.v1.layers.dense(fc_1, units=FLAGS.output_dims, activation=None, name='logits')
# train the model
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_labels_placeholder, logits=logits))
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, epsilon=1e-7)
gradients, variables = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.gradient_clip_threshold)
optimize = optimizer.apply_gradients(zip(gradients, variables))
# then it'll be trained with sess.run ...
Now for the PyTorch replication:
class BaselineModel(nn.Module):
def __init__(self):
super(BaselineModel, self).__init__()
self.encode_x = nn.Linear(1, embed_size)
self.encode_y = nn.Linear(1, embed_size)
self.encode_z = nn.Linear(1, embed_size)
self._embeddings = nn.Embedding(vocab_size, self.embedding_table_size)
nn.init.uniform_(self._embeddings.weight, -1.0, 1.0)
self.num_layers = 1
self.rnn = nn.LSTM(self.embedding_table_size, self.hidden_size, batch_first=True)
self.fc_after_text_lstm = nn.Linear(self.hidden_size, 100)
self.fc = nn.Linear(100, 256)
self.fc_final = nn.Linear(256, 2)
self.relu_activation = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
def init_hidden(self, batch_size, device='cuda:0'):
# for LSTM, we need # of layers
h_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
c_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
return h_0, c_0
def forward(self, input_text, x_coordinate=None, y_coordinate=None, z_coordinate=None):
x_embed = self.relu_activation(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
y_embed = self.relu_activation(self.encode_y(y_coordinate.cuda().to(torch.float32))).cuda()
z_embed = self.relu_activation(self.encode_z(z_coordinate.cuda().to(torch.float32))).cuda()
embeds = self._embeddings(input_text)
embedding, hidden = self.rnn(embeds, self.hidden)
text_fc = self.relu_activation(self.fc_after_text_lstm(embedding[:, -1]))
representations_so_far_added = torch.sum(torch.stack([text_fc, x_embed, y_embed, z_embed]), dim=0)
pre_final_embedding = self.relu_activation(self.fc(representations_so_far_added))
return self.fc_final(pre_final_embedding )
### training code
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, eps=1e-7)
criterion = nn.CrossEntropyLoss()
for input_text, x_coordinate, y_coordinate, z_coordinate, targets, train_data:
optimizer.zero_grad()
pred = model(input_text, x_coordinate=x_coordinate, y_coordinate=y_coordinate, z_coordinate=z_coordinate)
loss = criterion(pred.float(), targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
optimizer.step()
scheduler.step()
# accuracy evaluation code, this is evaluated over the entire epoch
pred_idx = F.log_softmax(pred, dim=1)
target_labels = targets.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
If anyone can spot any issue with the PyTorch implementation or maybe tell me what could be wrong, that would be much appreciated! I also tried to load the weights of the Tensorflow model into all the appropriate layers, and performance still struggles in PyTorch! Thanks in advance!
EDIT:
I have created a minimally reproducible example, because I still cannot figure out what the problem is. Any help would be still appreciated!
import torch
import torch.nn as nn
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr = 0.0005
n_epochs = 10
input_dim = 4
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50
class FeatureDataSet(torch.utils.data.Dataset):
def __init__(self, x_train, y_train, x_coordinates):
self.x_train = torch.tensor(x_train, dtype=torch.long)
self.y_train = torch.tensor(y_train)
self.x_coordinates = torch.tensor(x_coordinates, dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx], self.y_train[idx], self.x_coordinates[idx]
class RNN(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, batch_size):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
# linear layer to encode the coordinate
self.encode_x = nn.Linear(1, hidden_dim).cuda()
self._embeddings = nn.Embedding(40, 100).cuda()
# hidden_dim is 128
# layer_dim is 2
self.lstm = nn.LSTM(100, hidden_dim, layer_dim, batch_first=True).cuda()
self.fc = nn.Linear(2 * hidden_dim, output_dim).cuda()
self.batch_size = batch_size
self.hidden = None
def init_hidden(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
return [t.cpu() for t in (h0, c0)]
def forward(self, x, x_coordinate):
#initializing the hidden states
h0, c0 = self.init_hidden(x)
embeds = self._embeddings(x)
out, (hn, cn) = self.lstm(embeds.cuda(), (h0.cuda(), c0.cuda()))
x_embed = F.relu(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
representations_so_far_added = torch.cat([out[:, -1, :], x_embed], dim=1)
out = self.fc(representations_so_far_added)
return out
model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001)
print('Start model training')
import sklearn.metrics as skm
import torch.nn.functional as F
x_train = []
x_coordinates = []
y_train = []
for i in range(10000):
# create the data. if x_coordinate > 0 and the sentence says that (represented by [1, 5, 6, 8]), then we should predict positive else negative (if the x_coordinate > 0)
# same applies if the x_coordinate < 0, just that the sentence is now [1, 5, 6, 9]
if np.random.randint(0, 2) == 0:
if np.random.randint(0, 2) == 0:
# x coordinate > 0
x_train.append([1, 5, 6, 8])
x_coordinates.append([round(np.random.uniform(0.01, 1.00, 1)[0], 2)])
y_train.append(1.0)
else:
# x coordinate > 0 negative
x_train.append([1, 5, 6, 8])
x_coordinates.append([round(np.random.uniform(-1.00, 0.00, 1)[0], 2)])
y_train.append(0.0)
else:
if np.random.randint(0, 2) == 0:
# x coordinate < 0
x_train.append([1, 5, 6, 9])
x_coordinates.append([round(np.random.uniform(-1.00, 0.00, 1)[0], 2)])
y_train.append(1.0)
else:
# x coordinate < 0 negative
x_train.append([1, 5, 6, 9])
x_coordinates.append([round(np.random.uniform(0.01, 1.00, 1)[0], 2)])
y_train.append(0.0)
# print a sample of data
print(x_train[:10])
print(y_train[:10])
print(x_coordinates[:10])
# create a dataloader
trainingDataset = FeatureDataSet(x_train=x_train, y_train=y_train, x_coordinates=x_coordinates)
train_loader = torch.utils.data.DataLoader(dataset=trainingDataset, batch_size=batch_size, shuffle=True)
# for each epoch
for epoch in range(1, n_epochs + 1):
acc_all = []
# each batch
for i, (x_batch, y_batch, x_coord_batch) in enumerate(train_loader):
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
x_coord_batch = x_coord_batch.to(device)
opt.zero_grad()
# pass in the text (x_batch) and coordinate (x_coord_batch)
out = model(x_batch, x_coordinate=x_coord_batch)
loss = criterion(out.float(), y_batch.type(torch.LongTensor).cuda())
loss.backward()
opt.step()
pred_idx = F.log_softmax(out, dim=1)
target_labels = y_batch.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
acc_all.append(curr_acc)
print(np.mean(acc_all))
I suppose perhaps there are some mistakes in your dataset implementation in the PyTorch version.
I tried your pytorch BaselineModel on both the dataset in your "minimally reproducible example" and my own dataset constructed according to your description, and find that it works fine.
The following is my codes for testing on my own dataset. Note that I add several hyperparameters to the code of BaselineModel to make it run. I got accuracy over 99%.
import random
import torch
import torch.nn as nn
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr = 0.0005
n_epochs = 100
input_dim = 4
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
class FeatureDataSet(torch.utils.data.Dataset):
def __init__(self, x_train, y_train, x_coordinates, y_coordinates, z_coordinates):
self.x_train = torch.tensor(x_train, dtype=torch.long)
self.y_train = torch.tensor(y_train)
self.x_coordinates = torch.tensor(x_coordinates, dtype=torch.float32)
self.y_coordinates = torch.tensor(y_coordinates, dtype=torch.float32)
self.z_coordinates = torch.tensor(z_coordinates, dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx], self.y_train[idx], self.x_coordinates[idx], self.y_coordinates[idx], self.z_coordinates[idx]
class BaselineModel(nn.Module):
def __init__(self):
super(BaselineModel, self).__init__()
vocab_size = 40
self.hidden_size = 100
self.embedding_table_size = self.hidden_size
self.encode_x = nn.Linear(1, self.hidden_size)
self.encode_y = nn.Linear(1, self.hidden_size)
self.encode_z = nn.Linear(1, self.hidden_size)
self._embeddings = nn.Embedding(vocab_size, self.embedding_table_size)
nn.init.uniform_(self._embeddings.weight, -1.0, 1.0)
self.num_layers = 1
self.rnn = nn.LSTM(self.embedding_table_size, self.hidden_size, batch_first=True)
self.fc_after_text_lstm = nn.Linear(self.hidden_size, 100)
self.fc = nn.Linear(100, 256)
self.fc_final = nn.Linear(256, 2)
self.relu_activation = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
self.hidden = self.init_hidden(batch_size)
def init_hidden(self, batch_size, device='cuda:0'):
# for LSTM, we need # of layers
h_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
c_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
return h_0, c_0
def forward(self, input_text, x_coordinate=None, y_coordinate=None, z_coordinate=None):
x_embed = self.relu_activation(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
y_embed = self.relu_activation(self.encode_y(y_coordinate.cuda().to(torch.float32))).cuda()
z_embed = self.relu_activation(self.encode_z(z_coordinate.cuda().to(torch.float32))).cuda()
embeds = self._embeddings(input_text)
embedding, hidden = self.rnn(embeds, self.hidden)
text_fc = self.relu_activation(self.fc_after_text_lstm(embedding[:, -1]))
representations_so_far_added = torch.sum(torch.stack([text_fc, x_embed, y_embed, z_embed]), dim=0)
pre_final_embedding = self.relu_activation(self.fc(representations_so_far_added))
return self.fc_final(pre_final_embedding)
# model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
model = BaselineModel().cuda()
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001)
print('Start model training')
import sklearn.metrics as skm
import torch.nn.functional as F
x_train = []
x_coordinates = []
y_coordinates = []
z_coordinates = []
y_train = []
for i in range(10000):
x_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
y_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
z_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
x_coordinates.append([x_coordinate])
y_coordinates.append([y_coordinate])
z_coordinates.append([z_coordinate])
if np.random.randint(0, 2) == 0: # positive example
if x_coordinate <= 0 and z_coordinate <= 0:
x_train.append([1, 5, 6, 8])
elif x_coordinate <= 0 and z_coordinate > 0:
x_train.append([1, 5, 6, 9])
elif x_coordinate > 0 and z_coordinate <= 0:
x_train.append([1, 5, 6, 10])
elif x_coordinate > 0 and z_coordinate > 0:
x_train.append([1, 5, 6, 11])
y_train.append(1.0)
else:
if x_coordinate <= 0 and z_coordinate <= 0:
x_train.append(random.choice([[1, 5, 6, 9], [1, 5, 6, 10], [1, 5, 6, 11]]))
elif x_coordinate <= 0 and z_coordinate > 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 10], [1, 5, 6, 11]]))
elif x_coordinate > 0 and z_coordinate <= 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 9], [1, 5, 6, 11]]))
elif x_coordinate > 0 and z_coordinate > 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 9], [1, 5, 6, 10]]))
y_train.append(0.0)
# print a sample of data
print(x_train[:10])
print(y_train[:10])
print(x_coordinates[:10])
print(y_coordinates[:10])
print(z_coordinates[:10])
# create a dataloader
trainingDataset = FeatureDataSet(x_train=x_train, y_train=y_train, x_coordinates=x_coordinates, y_coordinates=y_coordinates, z_coordinates=z_coordinates)
train_loader = torch.utils.data.DataLoader(dataset=trainingDataset, batch_size=batch_size, shuffle=True)
# for each epoch
loss_meter = AverageMeter()
for epoch in range(1, n_epochs + 1):
acc_all = []
# each batch
loss_meter.reset()
for i, (x_batch, y_batch, x_coord_batch, y_coord_batch, z_coord_batch) in enumerate(train_loader):
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
x_coord_batch = x_coord_batch.to(device)
y_coord_batch = y_coord_batch.to(device)
z_coord_batch = z_coord_batch.to(device)
opt.zero_grad()
# pass in the text (x_batch) and coordinate (x_coord_batch)
out = model(x_batch, x_coordinate=x_coord_batch, y_coordinate=y_coord_batch, z_coordinate=z_coord_batch)
loss = criterion(out.float(), y_batch.type(torch.LongTensor).cuda())
loss.backward()
opt.step()
pred_idx = F.log_softmax(out, dim=1)
target_labels = y_batch.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
acc_all.append(curr_acc)
loss_meter.update(loss.item())
print(np.mean(acc_all))
print("loss is %f" % loss_meter.val)
As for the "minimally reproducible example", I think the model RNN doesn't work is quite reasonable, as I have stated in the comments. I suppose that tensorflow can not fit as well, although I have not tried it. Your "minimally reproducible example" may be unrelated to your main problem.

Getting None Error while creating Spatial Transformer Network

Output feature map of a convolution layer is (Batch, Height, Width, Channels). When we initialize the CNN in tensorflow we get None value in place of Batch. I am trying to implement Spatial Transformer Network in custom layer, so to vectorize the layer as Convolution Layer Batch Size is required. When I try to initialize the network the Spatial Transformer Layer is giving the error that operations cant be performed with None value.
My code is show below
class SpatialTransformer(Layer):
def __init__(self):
super(SpatialTransformer, self).__init__()
def affine_transform(self, input_shape, theta):
N = theta.shape[0]
H, W = input_shape #output dimensions of grid
x_t, y_t = tf.meshgrid(tf.linspace(-1, 1, W), tf.linspace(-1, 1, H))
x_t = tf.cast(tf.reshape(x_t, [-1]), dtype = tf.float32)
y_t = tf.cast(tf.reshape(y_t, [-1]), dtype = tf.float32)
ones = tf.ones(x_t.shape, dtype=tf.float32)
sampling_grids = tf.stack([x_t, y_t, ones])
sampling_grids = tf.expand_dims(sampling_grids, axis = 0)
sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))
batch_grids = tf.matmul(theta, sampling_grids)
batch_grids = tf.reshape(batch_grids, [N, 2, H, W])
return batch_grids
def get_pixel_value(self, feature_map, x_s, y_s):
"Util Function to get the value of pixel from 4d image tensors given position vectors x_s and y_s"
N, H, W = x_s.shape
batch_idx = tf.range(0, N)
batch_idx = tf.reshape(batch_idx, (N, 1, 1))
b = tf.tile(batch_idx, (1, H, W))
indices = tf.stack([b, y_s, x_s], 3) #creating indices of shape(N, H, W)
return tf.gather_nd(feature_map, indices) #extracting values corresponding to those indices
def bilinear_sampler(self, feature_map, x, y):
N, H, W, C = feature_map.shape
max_y = tf.cast(H - 1, dtype = tf.int32)
max_x = tf.cast(W - 1, dtype = tf.int32)
zero = tf.zeros([], dtype= tf.int32)
x = tf.cast(x, dtype = tf.float32)
y = tf.cast(y, dtype = tf.float32)
#Reshaping the batch grid from [-1, 1] to [0, W-1] and [0, H-1]
x = (x + 1.0) * tf.cast(max_x, dtype = tf.float32)/2.0
y = (y + 1.0) * tf.cast(max_y, dtype = tf.float32)/2.0
#Taking the 4 nearest points to the (x_i, y_i) to perform interpolation
x0 = tf.cast(tf.floor(x), dtype=tf.int32)
x1 = x0 + 1
y0 = tf.cast(tf.floor(y), dtype = tf.int32)
y1 = y0 + 1
#clipping the value to be between [0, W-1] or [0, H-1]
x0 = tf.clip_by_value(x0, zero, max_x)
x1 = tf.clip_by_value(x1, zero, max_x)
y0 = tf.clip_by_value(y0, zero, max_y)
y1 = tf.clip_by_value(y1, zero, max_y)
#getting pixel values of the corner coordinates(x0,y0), (x0, y1), (x1, y0), (x1, y1)
Ia = self.get_pixel_value(feature_map, x0, y0)
Ib = self.get_pixel_value(feature_map, x0, y1)
Ic = self.get_pixel_value(feature_map, x1, y0)
Id = self.get_pixel_value(feature_map, x1, y1)
#Changing the data type to float32
x0 = tf.cast(x0, dtype = tf.float32)
x1 = tf.cast(x1, dtype = tf.float32)
y0 = tf.cast(y0, dtype = tf.float32)
y1 = tf.cast(y1, dtype = tf.float32)
#calculating delta (or simply area) weights for interpolation
Wa = tf.expand_dims((x1-x)*(y1-y), axis=3)
Wb = tf.expand_dims((x1-x)*(y-y0), axis=3)
Wc = tf.expand_dims((x-x0)*(y1-y), axis=3)
Wd = tf.expand_dims((x-x0)*(y-y0), axis=3)
out = tf.add_n([Wa*Ia, Wb*Ib, Wc*Ic, Wd*Id])
return out
def call(self, feature_map, theta, out_size = None):
N, H, W, _ = feature_map.shape
if out_size:
out_H = out_size[0]
out_W = out_size[1]
batch_grids = self.affine_transform([out_H, out_W], theta)
else:
batch_grids = self.affine_transform([H, W], theta)
x_s = batch_grids[:,0,:,:]
y_s = batch_grids[:,0,:,:]
output_feature_map = self.bilinear_sampler(feature_map, x_s, y_s)
return output_feature_map
class Localisation_Network(Layer):
def __init__(self):
super(Localisation_Network, self).__init__()
self.conv = Conv2D(4,(3, 3), padding = "valid", strides=2, activation="relu", kernel_initializer="he_normal")
self.flatten = Flatten()
self.dense_1 = Dense(64, activation="relu", kernel_initializer="he_normal")
self.dense_2 = Dense(6, activation="linear")
self.reshape = Reshape((2, 3))
def call(self, input_tensor):
x = self.conv(input_tensor)
x = self.flatten(x)
x = self.dense_1(x)
x = self.dense_2(x)
x = self.reshape(x)
return x
def get_model():
x_input = Input((28, 28, 1))
u = Conv2D(16, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(x_input)
u = Conv2D(16, (3, 3), padding = "same", strides = 2, activation="relu", kernel_initializer="he_normal")(u)
theta = Localisation_Network()(u)
v = SpatialTransformer()(u, theta)
v = Conv2D(32, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(v)
x = Conv2D(32, (3, 3), padding = "same", strides = 2, activation= "relu", kernel_initializer="he_normal")(v)
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dense(10,activation ="softmax")(x)
model = Model(inputs = x_input, outputs = x)
return model
Error of the above code:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-47-d630585afd1d> in <module>()
4 u = Conv2D(16, (3, 3), padding = "same", strides = 2, activation="relu", kernel_initializer="he_normal")(u)
5 theta = Localisation_Network()(u)
----> 6 v = SpatialTransformer()(u, theta)
7 v = Conv2D(32, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(v)
8 x = Conv2D(32, (3, 3), padding = "same", strides = 2, activation= "relu", kernel_initializer="he_normal")(v)
4 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
668 except Exception as e: # pylint:disable=broad-except
669 if hasattr(e, 'ag_error_metadata'):
--> 670 raise e.ag_error_metadata.to_exception(e)
671 else:
672 raise
ValueError: in user code:
<ipython-input-7-910b0adb6eb7>:83 call *
batch_grids = self.affine_transform([H, W], theta)
<ipython-input-45-eb5ac5f8f722>:14 affine_transform *
sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper **
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py:1405 stack
value_shape = ops.convert_to_tensor(values[0], name=name)._shape_tuple() # pylint: disable=protected-access
/usr/local/lib/python3.6/dist-packages/tensorflow/python/profiler/trace.py:163 wrapped
return func(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1540 convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:339 _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:265 constant
allow_broadcast=True)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:283 _constant_impl
allow_broadcast=allow_broadcast))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_util.py:445 make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
It is hard to tell from here but based on stacktrace seems like this line is problematic - sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1])) (forwards None where is not expected).
2nd thing I have noticed - not sure if your call method override in SpatialTransformer should actually have 3 params def call(self, feature_map, theta, out_size = None): ?
Seems like since it inherits from Layer it should have input_tensor param only.
Not sure also if you need to override build for your use case and perhaps do the initializations required there.
Other than that you can try to extensively log (add print statements) and see where exactly None value 'enters'.
Finally, you can also upload an excerpt of your code sufficient to reproduce the same error and that could perhaps bring more help.
I have removed the tf.tile layer as the vectorized output of localisation network having dimension (None, 2, 3) will do the vectorization trick during tf.matmul operation. I have also replaced tf.reshape operation with predifined keras reshape layer tf.keras.layers.Reshape() for every reshape operation as they maintain the vectorization.
class SpatialTransformer(Layer):
def __init__(self, out_size, name= "spatial_transformer"):
super(SpatialTransformer, self).__init__()
self.out_size = out_size
self.reshape_1 = Reshape([2, self.out_size[0], self.out_size[1]]) #for replacing all the reshape to vectorized form
self.reshape_2 = Reshape([self.out_size[0], self.out_size[1]])
self.reshape_3 = Reshape([1, 1])
self.reshape_4 = Reshape([])
def affine_transform(self, input_shape, theta):
N = theta.shape[0]
H, W = input_shape #output dimensions of grid
x_t, y_t = tf.meshgrid(tf.linspace(-1, 1, W), tf.linspace(-1, 1, H))
x_t = tf.cast(tf.reshape(x_t, [-1]), dtype = tf.float32)
y_t = tf.cast(tf.reshape(y_t, [-1]), dtype = tf.float32)
ones = tf.ones(x_t.shape, dtype=tf.float32)
sampling_grids = tf.stack([x_t, y_t, ones])
# sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))
batch_grids = tf.matmul(theta, sampling_grids)
batch_grids = self.reshape_1(batch_grids)
return batch_grids
def get_pixel_value(self, feature_map, x_s, y_s):
"Util Function to get the value of pixel from 4d image tensors given position vectors x_s and y_s"
N, H, W = x_s.shape
batch_idx = tf.range(0, N)
batch_idx = self.reshape_3(batch_idx)
b = tf.tile(batch_idx, (1, H, W))
indices = tf.stack([b, y_s, x_s], 3) #creating indices of shape(N, H, W)
return tf.gather_nd(feature_map, indices) #extracting values corresponding to those indices
def bilinear_sampler(self, feature_map, x, y):
N, H, W, _ = feature_map.shape
max_y = tf.cast(H - 1, dtype = tf.int32)
max_x = tf.cast(W - 1, dtype = tf.int32)
zero = tf.zeros([], dtype= tf.int32)
x = tf.cast(x, dtype = tf.float32)
y = tf.cast(y, dtype = tf.float32)
#Reshaping the batch grid from [-1, 1] to [0, W-1] and [0, H-1]
x = (x + 1.0) * tf.cast(max_x, dtype = tf.float32)/2.0
y = (y + 1.0) * tf.cast(max_y, dtype = tf.float32)/2.0
#Taking the 4 nearest points to the (x_i, y_i) to perform interpolation
x0 = tf.cast(tf.floor(x), dtype=tf.int32)
x1 = x0 + 1
y0 = tf.cast(tf.floor(y), dtype = tf.int32)
y1 = y0 + 1
#clipping the value to be between [0, W-1] or [0, H-1]
x0 = tf.clip_by_value(x0, zero, max_x)
x1 = tf.clip_by_value(x1, zero, max_x)
y0 = tf.clip_by_value(y0, zero, max_y)
y1 = tf.clip_by_value(y1, zero, max_y)
#getting pixel values of the corner coordinates(x0,y0), (x0, y1), (x1, y0), (x1, y1)
Ia = self.get_pixel_value(feature_map, x0, y0)
Ib = self.get_pixel_value(feature_map, x0, y1)
Ic = self.get_pixel_value(feature_map, x1, y0)
Id = self.get_pixel_value(feature_map, x1, y1)
# print(f"Ia: {Ia}")
#Changing the data type to float32
x0 = tf.cast(x0, dtype = tf.float32)
x1 = tf.cast(x1, dtype = tf.float32)
y0 = tf.cast(y0, dtype = tf.float32)
y1 = tf.cast(y1, dtype = tf.float32)
#calculating delta (or simply area) weights for interpolation
Wa = tf.expand_dims((x1-x)*(y1-y), axis=3)
Wb = tf.expand_dims((x1-x)*(y-y0), axis=3)
Wc = tf.expand_dims((x-x0)*(y1-y), axis=3)
Wd = tf.expand_dims((x-x0)*(y-y0), axis=3)
out = tf.add_n([Wa*Ia, Wb*Ib, Wc*Ic, Wd*Id])
return out
def call(self, input_tensor):
feature_map, theta = input_tensor
N, H, W, _ = feature_map.shape
if self.out_size:
out_H = self.out_size[0]
out_W = self.out_size[1]
batch_grids = self.affine_transform([out_H, out_W], theta)
else:
batch_grids = self.affine_transform([H, W], theta)
x_s = self.reshape_2(batch_grids[:,0,:,:])
y_s = self.reshape_2(batch_grids[:,1,:,:])
output_feature_map = self.bilinear_sampler(feature_map, x_s, y_s)
return output_feature_map
class Localisation_Network(Layer):
def __init__(self):
super(Localisation_Network, self).__init__()
self.conv_1 = Conv2D(16, (3, 3), padding = "same", strides=1, activation="relu", kernel_initializer="he_normal")
self.conv_2 = Conv2D(32, (3, 3), padding = "same", strides=1, activation="relu", kernel_initializer="he_normal")
self.flatten = Flatten()
self.dense_1 = Dense(32, activation="relu", kernel_initializer="he_normal")
def bias_init(shape, dtype = None):
identitiy = tf.Variable([[1.0, 0.0, 0.0],[0.0, 1.0, 0.0]])
identitiy = tf.reshape(identitiy, -1)
return identitiy
self.dense_2 = Dense(6,kernel_initializer = "zeros", bias_initializer = bias_init)
self.reshape = Reshape((2, 3))
def call(self, input_tensor):
x = self.conv_1(input_tensor)
x = self.conv_2(x)
x = tf.reduce_mean(x, axis = [1, 2])
x = self.dense_1(x)
x = self.dense_2(x)
x = self.reshape(x)
return x
def transformer_model_2():
x_input = Input((28, 28, 1))
theta = Localisation_Network()(x_input)
x = SpatialTransformer(x_input.shape[1:3], name = "transformer_output" )([x_input, theta])
x = Conv2D(16, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(x)
x = Conv2D(16, (3, 3), padding = "same", strides = 2, activation="relu", kernel_initializer="he_normal")(x)
x = Conv2D(32, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(x)
x = Conv2D(32, (3, 3), padding = "same", strides = 2, activation= "relu", kernel_initializer="he_normal")(x)
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dense(10,activation ="softmax")(x)
return Model(inputs = x_input, outputs = x)
The only thing I am stuck on is localization network, as it is a regression network so linear activation is placed but the output of this network causes the value to be big and so clipped later in bilinear sampling which ultimately results in zero output and hence gradients are not able to flow through localization network.
I have looked up medium post and github to find the solution, many of them suggested to initialize the weights to zeros and biases to identity: [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]] in the last layer of the localization network but it's not working.

I get an error when I load a tensorflow2.0 model

I am learning a simple model to perform a linear regression and then I save the model
class NN(tf.keras.Model):
def __init__(self):
super(NN, self).__init__()
L = 20
self.W1 = tf.Variable(tf.random.truncated_normal([1, L], stddev=math.sqrt(3)))
self.B1 = tf.Variable(tf.random.truncated_normal([1, L], stddev=1.0))
self.W2 = tf.Variable(tf.random.truncated_normal([L, 1], stddev=math.sqrt(3/L)))
self.B2 = tf.Variable(tf.zeros([1]))
def call(self, inputs):
Z1 = tf.matmul(inputs, self.W1) + self.B1
Y1 = tf.nn.tanh(Z1)
Y = tf.matmul(Y1, self.W2) + self.B2
return Y
# The loss function to be optimized
def loss(model, X, Y_):
error = model(X) - Y_
return tf.reduce_mean(tf.square(error))
model = NN()
optimizer = tf.optimizers.Adam(learning_rate=0.001)
bsize = 20
# You can call this function in a loop to train the model, bsize samples at a time
def training_step(i):
# read data
x_batch, y_batch = func.next_batch(bsize)
x_batch = np.reshape(x_batch, (bsize,1))
y_batch = np.reshape(y_batch, (bsize,1))
# compute training values
loss_fn = lambda: loss(model, x_batch, y_batch)
optimizer.minimize(loss_fn, [model.W1, model.B1, model.W2, model.B2])
if i%5000 == 0:
l = loss(model, x_batch, y_batch)
print(str(i) + ": epoch: " + str(func._epochs_completed) + ": loss: " + str(l.numpy()))
for i in range(50001):
training_step(i)
# save the model
tf.saved_model.save(model, "my_file")
and then I am trying to load the model with the following lines following tensorflow documentation:
model = tf.saved_model.load("my_file")
f = model.signatures["serving_default"]
y = f(x)
However I get the following error message:
f = model.signatures["serving_default"]
File "my_file/signature_serialization.py", line 195, in __getitem__
return self._signatures[key]
KeyError: 'serving_default'
What is wrong ? Why serving_default is not defined ?
I solved the problem by adding a third argument to the tf.saved_model.save function
tf.saved_model.save(model, "myfile", signatures=model.call.get_concrete_function(tf.TensorSpec(shape=[None,1], dtype=tf.float32, name="inp")))
and by adding the #tf.function above the call method
class NN(tf.keras.Model):
def __init__(self):
super(NN, self).__init__()
L = 20
self.W1 = tf.Variable(tf.random.truncated_normal([1, L], stddev=math.sqrt(3)))
self.B1 = tf.Variable(tf.random.truncated_normal([1, L], stddev=1.0))
self.W2 = tf.Variable(tf.random.truncated_normal([L, 1], stddev=math.sqrt(3/L)))
self.B2 = tf.Variable(tf.zeros([1]))
#tf.function
def call(self, X):
Z1 = tf.matmul(X, self.W1) + self.B1
Y1 = tf.nn.tanh(Z1)
Y = tf.matmul(Y1, self.W2) + self.B2
return Y

ValueError: Cannot feed value of shape (128, 28, 28) for Tensor 'Placeholder:0', which has shape '(?, 784)'

I am new to Tensorflow and Machine Learning and trying out CNN using Tensorflow with my custom input data. But I am getting the error attached below.
The Data or Image Size is 28x28 with 15 Labels.
I am not getting the numpy reshape thing in this script or the error.
Help is highly appreciated.
import tensorflow as tf
import os
import skimage.data
import numpy as np
import random
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".jpg")]
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(d)
print(str(d)+' Completed')
return images, labels
ROOT_PATH = "H:\Testing\TrainingData"
train_data_directory = os.path.join(ROOT_PATH, "Training")
test_data_directory = os.path.join(ROOT_PATH, "Testing")
print('Loading Data...')
images, labels = load_data(train_data_directory)
print('Data has been Loaded')
n_classes = 15
training_examples = 10500
test_examples = 4500
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def neural_network_model(x):
weights = {'W_Conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_Conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_FC':tf.Variable(tf.random_normal([7*7*64, 1024])),
'Output':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'B_Conv1':tf.Variable(tf.random_normal([32])),
'B_Conv2':tf.Variable(tf.random_normal([64])),
'B_FC':tf.Variable(tf.random_normal([1024])),
'Output':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1,28,28,1])
conv1 = conv2d(x, weights['W_Conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['W_Conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_FC'])+biases['B_FC'])
output = tf.matmul(fc, weights['Output'])+biases['Output']
return output
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(training_examples/batch_size)):
epoch_x, epoch_y = next_batch(batch_size, images, labels)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x: images, y: labels}))
print('Training Neural Network...')
train_neural_network(x)
What am I doing wrong? What is needed to be fixed and how do I fix the shape of numpy array?
If you look closely, you'll see that you have two x placeholders:
x = tf.placeholder('float', [None, 784]) # global
...
x = tf.reshape(x, shape=[-1,28,28,1]) # in neural_network_model
One of them is in the function scope, hence not visible in train_neural_network, so tensorflow takes the one with [?, 784] shape. You should get rid of one of them.
Also note that your training data has the rank 3, i.e. [batch_size, 28, 28], so it's not directly compatible with any of those placeholders.
To feed it into the first x, take epoch_x.reshape([-1, 784]). For the second placeholder (once you make it visible), take epoch_x.reshape([-1, 28, 28, 1]).