different results in inference between python and c++ opencv Mat:: - tensorflow

i'm doing a re identification network, implementing a triplet-loss function, at that point everything is fine. the networks works fine in python, I implemented the network on keras with tensorflow as backend, I passed the .hd5 to a .pb file to make inference in tensorflow c++, the probmes is that with the same images the result is difference between python and c++ and I don't know why anyone to help me?
here is the the model in python:
import keras
import keras.applications
import keras.layers as layer
import tensorflow as tf
from keras import backend as K
from keras.backend.tensorflow_backend import set_session
from keras.models import Model as md
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)
class Model:
def init(self, shape):
self.shape = shape
self.params = {
'optimizer': 'sgd',
'first_neuron': 12,
'first_max_pooling': 2,
'second_neuron': 12,
'second_max_pooling': 2,
'third_neuron': 20,
'third_max_pooling': 3,
'dense_neuron': 64,
'final_neuron': 128,
}
self.feature_model = self.create_features_model()
self.triplet_model = self.create_model()
def create_features_model(self):
# Define the vision modules
img_input = layer.Input(shape=(self.shape))
x = layer.Conv2D(self.params['first_neuron'], (3, 3), activation='relu')(img_input)
x = layer.MaxPooling2D((self.params['first_max_pooling'], self.params['first_max_pooling']))(x)
x = layer.Conv2D(self.params['second_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['second_max_pooling'], self.params['second_max_pooling']))(x)
x = layer.Conv2D(self.params['third_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['third_max_pooling'], self.params['third_max_pooling']))(x)
x = layer.Flatten()(x)
x = layer.Dense(self.params['dense_neuron'], activation='relu')(x)
x = layer.Dense(self.params['final_neuron'], activation='relu')(x)
out = layer.Lambda(lambda x: K.l2_normalize(x, axis=1), name='t_emb_1_lnorm')(x)
features_model = md(img_input, out)
features_model.summary()
return features_model
def create_model(self):
base_model = self.feature_model
# triplet framework, shared weights
input_shape = (self.shape)
input_target = layer.Input(shape=input_shape, name='input_target')
input_positive = layer.Input(shape=input_shape, name='input_pos')
input_negative = layer.Input(shape=input_shape, name='input_neg')
net_target = base_model(input_target)
net_positive = base_model(input_positive)
net_negative = base_model(input_negative)
# The Lamda layer produces output using given function. Here its Euclidean distance.
positive_distance = layer.Lambda(self.euclidean_distance, name='pos_dist')([net_target, net_positive])
negative_distance = layer.Lambda(self.euclidean_distance, name='neg_dist')([net_target, net_negative])
diference = layer.Lambda(self.euclidean_distance, name='dif')([net_positive, net_negative])
# This lambda layer simply stacks outputs so both distances are available to the objective
distances = layer.Lambda(lambda vects: K.stack(vects, axis=1), name='distance')(
[positive_distance, negative_distance, diference])
model = md([input_target, input_positive, input_negative], distances, name='result')
# Setting up optimizer designed for variable learning rate
model.compile(optimizer=keras.optimizers.Adam(lr=0.001, decay=0.00002),
loss=self.triplet_loss, metrics=[self.accuracy])
return model
def triplet_loss(self, _, y_pred):
margin = K.constant(0.5)
return K.mean(K.maximum(K.constant(0), K.square(y_pred[:, 0, 0]) - 0.5 * (
K.square(y_pred[:, 1, 0]) + K.square(y_pred[:, 2, 0])) + margin))
def accuracy(self, _, y_pred):
return K.mean(y_pred[:, 0, 0] < y_pred[:, 1, 0])
def lnorm(self, x):
return K.l2_normalize(x, axis=-1)
def euclidean_distance(self, vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
this is how I made inference on python:
from model import Model as model
from keras.utils import HDF5Matrix
import numpy as np
import cv2
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)
def load_datasets(in_h5_path, partition='train'):
if partition == 'train':
target = HDF5Matrix(datapath=in_h5_path, dataset="targets")
positive = HDF5Matrix(datapath=in_h5_path, dataset="positives")
negative = HDF5Matrix(datapath=in_h5_path, dataset="negatives")
return target, positive, negative
else:
print("Invalid 'partition' parameter: Valid values: ['train', 'test']")
tar = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/tra1.png")
nega = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/dec1.png")
tar = cv2.resize(tar, (32, 32), interpolation=cv2.INTER_CUBIC)
nega = cv2.resize(nega, (32, 32), interpolation=cv2.INTER_CUBIC)
t1 = np.array(tar).reshape((1, 32, 32, 3))
t2 = np.array(nega).reshape((1, 32, 32, 3))
target, positive, negative = load_datasets('/home/amejia/PycharmProjects/lossDatasetGenerator/test/test32.h5')
net = model((32, 32, 3))
net.triplet_model.load_weights("/home/amejia/PycharmProjects/triplet_loss/simple-grande.hdf5")
enter = [t1, t2, t1]
a = net.triplet_model.predict(x=enter, batch_size=1)
print(a)
the inference in c++ :
in c++ this si how I made inference:
tensorflow::Tensor target(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
tensorflow::Tensor positive(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
img_to_float2(tracks, detections, target, positive, frame);
std::vector<std::pair<std::string, tensorflow::Tensor>> Input = {{"input_target:0", target},
{"input_pos:0", positive},
{"input_neg:0", target}};
std::vector<tensorflow::Tensor> Outputs;
tensorflow::Status Status = session->Run(Input, {"distance/stack:0"}, {}, &Outputs);
auto data = Outputs[0].flat<float>();
std::cout << Outputs[0].DebugString() << std::endl;
and this is the function to put create the in tensor:
void LossModel::img_to_float2(Track &tracks, Detection &detections, tensorflow::Tensor &tracksTensor,
tensorflow::Tensor &detectionsTensor, cv::Mat &frame) {
auto *tar = tracksTensor.flat<float>().data();
auto *dec = detectionsTensor.flat<float>().data();
cv::Mat detectionImg = frame(detections.getBox()).clone();
resize(detectionImg, detectionImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::INTER_CUBIC);
cv::Mat resizedImage(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, dec);
detectionImg.convertTo(resizedImage, CV_32FC3);
cv::Mat trackImg = tracks.get_img().clone();
resize(trackImg, trackImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::INTER_CUBIC);
cv::Mat resizedImage2(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, tar);
trackImg.convertTo(resizedImage2, CV_32FC3);

Related

Trouble Training Same Tensorflow Model in PyTorch

I have trained a model in Tensorflow and am having trouble replicating it in PyTorch. The Tensorflow model achieves near 100% accuracy (the task is simple), but the PyTorch model performs at random. I've spent a while trying to figure this out, and can't understand what the problem could be.
The model is trained for the task of binary classification. Given an input utterance describing a quadrant and a (x, y, z) coordinate, the model has to predict if the (x, z) portion of the coordinate is in the quadrant described. For example, if the input text was "quadrant 1" and the coordinate was (0.5, -, 0.5), then the prediction should be true, but if the region was "quadrant 2" with the same coordinate, then the prediction should be false.
I generated some data and trained the model in Tensorflow using this code:
x_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="x_data")
y_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="y_data")
z_data_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, 1], name="z_data")
# text and labels placeholders
text_data = tf.placeholder(tf.int32, [FLAGS.batch_size, maxtextlength])
text_lengths = tf.placeholder(tf.int32, [FLAGS.batch_size])
y_labels_placeholder = tf.placeholder(tf.int64, [FLAGS.batch_size])
# encode text and coordinate
embeddings = tf.Variable(tf.random_uniform([100, embedding_size], -1, -1))
rnn_inputs = tf.nn.embedding_lookup(embeddings, text_data)
rnn_layers = [tf.compat.v1.nn.rnn_cell.LSTMCell(size, initializer=tf.compat.v1.keras.initializers.glorot_normal) for size in [256]]
multi_rnn_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell(rnn_layers, state_is_tuple=True)
text_outputs, text_fstate = tf.compat.v1.nn.dynamic_rnn(cell=multi_rnn_cell,
inputs=rnn_inputs,
dtype=tf.float32, sequence_length=text_lengths)
# have fully connected layers to map them the input coordinates into the same dimension as the LSTM output layer from above
x_output_layer = tf.compat.v1.layers.dense(x_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='x_coordinate')
y_output_layer = tf.compat.v1.layers.dense(y_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='y_coordinate')
z_output_layer = tf.compat.v1.layers.dense(z_data_placeholder, units=FLAGS.fc_column_size, activation=tf.nn.relu, name='z_coordinate')
# add the representations
total_output_layer = x_output_layer + y_output_layer + z_output_layer + lstm_output_layer
# make the predictions with two fully connected layers
fc_1 = tf.compat.v1.layers.dense(total_output_layer, units=FLAGS.hidden_layer_size, activation=tf.nn.relu, name='fc_1')
logits = tf.compat.v1.layers.dense(fc_1, units=FLAGS.output_dims, activation=None, name='logits')
# train the model
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_labels_placeholder, logits=logits))
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, epsilon=1e-7)
gradients, variables = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.gradient_clip_threshold)
optimize = optimizer.apply_gradients(zip(gradients, variables))
# then it'll be trained with sess.run ...
Now for the PyTorch replication:
class BaselineModel(nn.Module):
def __init__(self):
super(BaselineModel, self).__init__()
self.encode_x = nn.Linear(1, embed_size)
self.encode_y = nn.Linear(1, embed_size)
self.encode_z = nn.Linear(1, embed_size)
self._embeddings = nn.Embedding(vocab_size, self.embedding_table_size)
nn.init.uniform_(self._embeddings.weight, -1.0, 1.0)
self.num_layers = 1
self.rnn = nn.LSTM(self.embedding_table_size, self.hidden_size, batch_first=True)
self.fc_after_text_lstm = nn.Linear(self.hidden_size, 100)
self.fc = nn.Linear(100, 256)
self.fc_final = nn.Linear(256, 2)
self.relu_activation = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
def init_hidden(self, batch_size, device='cuda:0'):
# for LSTM, we need # of layers
h_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
c_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
return h_0, c_0
def forward(self, input_text, x_coordinate=None, y_coordinate=None, z_coordinate=None):
x_embed = self.relu_activation(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
y_embed = self.relu_activation(self.encode_y(y_coordinate.cuda().to(torch.float32))).cuda()
z_embed = self.relu_activation(self.encode_z(z_coordinate.cuda().to(torch.float32))).cuda()
embeds = self._embeddings(input_text)
embedding, hidden = self.rnn(embeds, self.hidden)
text_fc = self.relu_activation(self.fc_after_text_lstm(embedding[:, -1]))
representations_so_far_added = torch.sum(torch.stack([text_fc, x_embed, y_embed, z_embed]), dim=0)
pre_final_embedding = self.relu_activation(self.fc(representations_so_far_added))
return self.fc_final(pre_final_embedding )
### training code
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, eps=1e-7)
criterion = nn.CrossEntropyLoss()
for input_text, x_coordinate, y_coordinate, z_coordinate, targets, train_data:
optimizer.zero_grad()
pred = model(input_text, x_coordinate=x_coordinate, y_coordinate=y_coordinate, z_coordinate=z_coordinate)
loss = criterion(pred.float(), targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
optimizer.step()
scheduler.step()
# accuracy evaluation code, this is evaluated over the entire epoch
pred_idx = F.log_softmax(pred, dim=1)
target_labels = targets.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
If anyone can spot any issue with the PyTorch implementation or maybe tell me what could be wrong, that would be much appreciated! I also tried to load the weights of the Tensorflow model into all the appropriate layers, and performance still struggles in PyTorch! Thanks in advance!
EDIT:
I have created a minimally reproducible example, because I still cannot figure out what the problem is. Any help would be still appreciated!
import torch
import torch.nn as nn
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr = 0.0005
n_epochs = 10
input_dim = 4
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50
class FeatureDataSet(torch.utils.data.Dataset):
def __init__(self, x_train, y_train, x_coordinates):
self.x_train = torch.tensor(x_train, dtype=torch.long)
self.y_train = torch.tensor(y_train)
self.x_coordinates = torch.tensor(x_coordinates, dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx], self.y_train[idx], self.x_coordinates[idx]
class RNN(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, batch_size):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
# linear layer to encode the coordinate
self.encode_x = nn.Linear(1, hidden_dim).cuda()
self._embeddings = nn.Embedding(40, 100).cuda()
# hidden_dim is 128
# layer_dim is 2
self.lstm = nn.LSTM(100, hidden_dim, layer_dim, batch_first=True).cuda()
self.fc = nn.Linear(2 * hidden_dim, output_dim).cuda()
self.batch_size = batch_size
self.hidden = None
def init_hidden(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
return [t.cpu() for t in (h0, c0)]
def forward(self, x, x_coordinate):
#initializing the hidden states
h0, c0 = self.init_hidden(x)
embeds = self._embeddings(x)
out, (hn, cn) = self.lstm(embeds.cuda(), (h0.cuda(), c0.cuda()))
x_embed = F.relu(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
representations_so_far_added = torch.cat([out[:, -1, :], x_embed], dim=1)
out = self.fc(representations_so_far_added)
return out
model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001)
print('Start model training')
import sklearn.metrics as skm
import torch.nn.functional as F
x_train = []
x_coordinates = []
y_train = []
for i in range(10000):
# create the data. if x_coordinate > 0 and the sentence says that (represented by [1, 5, 6, 8]), then we should predict positive else negative (if the x_coordinate > 0)
# same applies if the x_coordinate < 0, just that the sentence is now [1, 5, 6, 9]
if np.random.randint(0, 2) == 0:
if np.random.randint(0, 2) == 0:
# x coordinate > 0
x_train.append([1, 5, 6, 8])
x_coordinates.append([round(np.random.uniform(0.01, 1.00, 1)[0], 2)])
y_train.append(1.0)
else:
# x coordinate > 0 negative
x_train.append([1, 5, 6, 8])
x_coordinates.append([round(np.random.uniform(-1.00, 0.00, 1)[0], 2)])
y_train.append(0.0)
else:
if np.random.randint(0, 2) == 0:
# x coordinate < 0
x_train.append([1, 5, 6, 9])
x_coordinates.append([round(np.random.uniform(-1.00, 0.00, 1)[0], 2)])
y_train.append(1.0)
else:
# x coordinate < 0 negative
x_train.append([1, 5, 6, 9])
x_coordinates.append([round(np.random.uniform(0.01, 1.00, 1)[0], 2)])
y_train.append(0.0)
# print a sample of data
print(x_train[:10])
print(y_train[:10])
print(x_coordinates[:10])
# create a dataloader
trainingDataset = FeatureDataSet(x_train=x_train, y_train=y_train, x_coordinates=x_coordinates)
train_loader = torch.utils.data.DataLoader(dataset=trainingDataset, batch_size=batch_size, shuffle=True)
# for each epoch
for epoch in range(1, n_epochs + 1):
acc_all = []
# each batch
for i, (x_batch, y_batch, x_coord_batch) in enumerate(train_loader):
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
x_coord_batch = x_coord_batch.to(device)
opt.zero_grad()
# pass in the text (x_batch) and coordinate (x_coord_batch)
out = model(x_batch, x_coordinate=x_coord_batch)
loss = criterion(out.float(), y_batch.type(torch.LongTensor).cuda())
loss.backward()
opt.step()
pred_idx = F.log_softmax(out, dim=1)
target_labels = y_batch.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
acc_all.append(curr_acc)
print(np.mean(acc_all))
I suppose perhaps there are some mistakes in your dataset implementation in the PyTorch version.
I tried your pytorch BaselineModel on both the dataset in your "minimally reproducible example" and my own dataset constructed according to your description, and find that it works fine.
The following is my codes for testing on my own dataset. Note that I add several hyperparameters to the code of BaselineModel to make it run. I got accuracy over 99%.
import random
import torch
import torch.nn as nn
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr = 0.0005
n_epochs = 100
input_dim = 4
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
class FeatureDataSet(torch.utils.data.Dataset):
def __init__(self, x_train, y_train, x_coordinates, y_coordinates, z_coordinates):
self.x_train = torch.tensor(x_train, dtype=torch.long)
self.y_train = torch.tensor(y_train)
self.x_coordinates = torch.tensor(x_coordinates, dtype=torch.float32)
self.y_coordinates = torch.tensor(y_coordinates, dtype=torch.float32)
self.z_coordinates = torch.tensor(z_coordinates, dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx], self.y_train[idx], self.x_coordinates[idx], self.y_coordinates[idx], self.z_coordinates[idx]
class BaselineModel(nn.Module):
def __init__(self):
super(BaselineModel, self).__init__()
vocab_size = 40
self.hidden_size = 100
self.embedding_table_size = self.hidden_size
self.encode_x = nn.Linear(1, self.hidden_size)
self.encode_y = nn.Linear(1, self.hidden_size)
self.encode_z = nn.Linear(1, self.hidden_size)
self._embeddings = nn.Embedding(vocab_size, self.embedding_table_size)
nn.init.uniform_(self._embeddings.weight, -1.0, 1.0)
self.num_layers = 1
self.rnn = nn.LSTM(self.embedding_table_size, self.hidden_size, batch_first=True)
self.fc_after_text_lstm = nn.Linear(self.hidden_size, 100)
self.fc = nn.Linear(100, 256)
self.fc_final = nn.Linear(256, 2)
self.relu_activation = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
self.hidden = self.init_hidden(batch_size)
def init_hidden(self, batch_size, device='cuda:0'):
# for LSTM, we need # of layers
h_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
c_0 = torch.zeros(1, batch_size, self.hidden_size).to(device)
return h_0, c_0
def forward(self, input_text, x_coordinate=None, y_coordinate=None, z_coordinate=None):
x_embed = self.relu_activation(self.encode_x(x_coordinate.cuda().to(torch.float32)).cuda())
y_embed = self.relu_activation(self.encode_y(y_coordinate.cuda().to(torch.float32))).cuda()
z_embed = self.relu_activation(self.encode_z(z_coordinate.cuda().to(torch.float32))).cuda()
embeds = self._embeddings(input_text)
embedding, hidden = self.rnn(embeds, self.hidden)
text_fc = self.relu_activation(self.fc_after_text_lstm(embedding[:, -1]))
representations_so_far_added = torch.sum(torch.stack([text_fc, x_embed, y_embed, z_embed]), dim=0)
pre_final_embedding = self.relu_activation(self.fc(representations_so_far_added))
return self.fc_final(pre_final_embedding)
# model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
model = BaselineModel().cuda()
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001)
print('Start model training')
import sklearn.metrics as skm
import torch.nn.functional as F
x_train = []
x_coordinates = []
y_coordinates = []
z_coordinates = []
y_train = []
for i in range(10000):
x_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
y_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
z_coordinate = round(np.random.uniform(-1, 1.00, 1)[0], 2)
x_coordinates.append([x_coordinate])
y_coordinates.append([y_coordinate])
z_coordinates.append([z_coordinate])
if np.random.randint(0, 2) == 0: # positive example
if x_coordinate <= 0 and z_coordinate <= 0:
x_train.append([1, 5, 6, 8])
elif x_coordinate <= 0 and z_coordinate > 0:
x_train.append([1, 5, 6, 9])
elif x_coordinate > 0 and z_coordinate <= 0:
x_train.append([1, 5, 6, 10])
elif x_coordinate > 0 and z_coordinate > 0:
x_train.append([1, 5, 6, 11])
y_train.append(1.0)
else:
if x_coordinate <= 0 and z_coordinate <= 0:
x_train.append(random.choice([[1, 5, 6, 9], [1, 5, 6, 10], [1, 5, 6, 11]]))
elif x_coordinate <= 0 and z_coordinate > 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 10], [1, 5, 6, 11]]))
elif x_coordinate > 0 and z_coordinate <= 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 9], [1, 5, 6, 11]]))
elif x_coordinate > 0 and z_coordinate > 0:
x_train.append(random.choice([[1, 5, 6, 8], [1, 5, 6, 9], [1, 5, 6, 10]]))
y_train.append(0.0)
# print a sample of data
print(x_train[:10])
print(y_train[:10])
print(x_coordinates[:10])
print(y_coordinates[:10])
print(z_coordinates[:10])
# create a dataloader
trainingDataset = FeatureDataSet(x_train=x_train, y_train=y_train, x_coordinates=x_coordinates, y_coordinates=y_coordinates, z_coordinates=z_coordinates)
train_loader = torch.utils.data.DataLoader(dataset=trainingDataset, batch_size=batch_size, shuffle=True)
# for each epoch
loss_meter = AverageMeter()
for epoch in range(1, n_epochs + 1):
acc_all = []
# each batch
loss_meter.reset()
for i, (x_batch, y_batch, x_coord_batch, y_coord_batch, z_coord_batch) in enumerate(train_loader):
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
x_coord_batch = x_coord_batch.to(device)
y_coord_batch = y_coord_batch.to(device)
z_coord_batch = z_coord_batch.to(device)
opt.zero_grad()
# pass in the text (x_batch) and coordinate (x_coord_batch)
out = model(x_batch, x_coordinate=x_coord_batch, y_coordinate=y_coord_batch, z_coordinate=z_coord_batch)
loss = criterion(out.float(), y_batch.type(torch.LongTensor).cuda())
loss.backward()
opt.step()
pred_idx = F.log_softmax(out, dim=1)
target_labels = y_batch.cpu().int()
pred_labels = torch.argmax(pred_idx, dim=-1).cpu().data.int()
curr_acc = skm.accuracy_score(target_labels, pred_labels)
acc_all.append(curr_acc)
loss_meter.update(loss.item())
print(np.mean(acc_all))
print("loss is %f" % loss_meter.val)
As for the "minimally reproducible example", I think the model RNN doesn't work is quite reasonable, as I have stated in the comments. I suppose that tensorflow can not fit as well, although I have not tried it. Your "minimally reproducible example" may be unrelated to your main problem.

Tensor format issue from converting Pytorch -> Onnx -> Tensorflow

I have an issue with Tensorflow model that is converted from Pytorch -> Onnx -> Tensorflow. The issue is the converted Tensorflow model expects the input in Pytorch format that is (batch size, number channels, height, width) but not in Tensorflow format (batch size, height, width, number channel). Therefore, I cannot use the model to process further with Vitis AI.
So I would like to ask is there is any ways to convert this Pytorch input format to Tensorflow format by using tools from Onnx, Tensorflow 1, or others?
My code is as below:
Pytorch -> Onnx
from hardnet import hardnet
import torch
import onnx
ckpt = torch.load('../hardnet.pth')
model_state_dict = ckpt['model_state_dict']
optimizer_state_dict = ckpt['optimizer_state_dict']
model = hardnet(11)
model.load_state_dict(model_state_dict)
model.eval()
dummy_input = torch.randn(1, 3, 1080, 1920)
input_names = ['input0']
output_names = ['output0']
output_file = 'hardnet.onnx'
torch.onnx.export(model, dummy_input, output_file, verbose=True,
input_names=input_names, output_names=output_names,
opset_version=11, keep_initializers_as_inputs=True)
onnx_model = onnx.load(output_file)
onnx.checker.check_model(onnx_model)
print('Passed Onnx')
Onnx -> Tensorflow 1 (using Tensorflow 1.15)
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import onnx
from onnx_tf.backend import prepare
output_file = 'hardnet.onnx'
onnx_model = onnx.load(output_file)
output = prepare(onnx_model)
output.export_graph('hardnet.pb')
tf.compat.v1.disable_eager_execution()
def load_pb(path_to_pb: str):
"""From: https://stackoverflow.com/questions/51278213/what-is-the-use-of-a-pb-file-in-tensorflow-and-how-does-it-work
"""
with tf.gfile.GFile(path_to_pb, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name='')
return graph
graph = load_pb('hardnet.pb')
input = graph.get_tensor_by_name('input0:0')
output = graph.get_tensor_by_name('output0:0')
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img = cv2.imread('train_0.jpg', cv2.IMREAD_COLOR)
img = cv2.resize(img, (1920, 1080))
img = img/255
img = img - mean
img = img/std
img = np.expand_dims(img, -1)
# To Pytorch format.
img = np.transpose(img, (3, 2, 0, 1))
img = img
with tf.Session(graph=graph) as sess:
pred = sess.run(output, {input: img})
You could wrap your Pytorch model into another one that would do the transpose you want to have in TensorFlow. See the following example:
Let's say you have the following toy NN:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.rnn = nn.LSTM(10, 20, 2)
def forward(self, x):
h0 = torch.zeros(2, 3, 20)
c0 = torch.zeros(2, 3, 20)
return self.rnn(x, (h0, c0))
the exemplary pytorch/tensorflow input shape would be :
>> pytorch_input = torch.randn(5, 3, 10)
>> tf_input = torch.transpose(pytorch_input, 1, 2)
>> print("PyTorch input shape: ", pytorch_input.shape)
>> print("TensorFlow input shape: ", tf_input.shape)
PyTorch input shape: torch.Size([5, 3, 10])
TensorFlow input shape: torch.Size([5, 10, 3])
Now, the wrapper which will first transpose input and then pass transposed input to some model:
class NetTensorFlowWrapper(nn.Module):
def __init__(self, main_module: nn.Module):
super(NetTensorFlowWrapper, self).__init__()
self.main_module = main_module
def forward(self, x):
x = torch.transpose(x, 1, 2)
return self.main_module(x)
Then, this is possible:
net = Net()
net_wrapper = NetTensorFlowWrapper(net)
net(pytorch_input)
net_wrapper(tf_input)
and then, when you finally save your models like you did previously via torch.onnx.export and read their graph via onnx package (not torch.onnx) you will have...
for Net- input 5x3x10 and no transpose layer
graph torch-jit-export (
%input0[FLOAT, 5x3x10]
{
%76 = Shape(%input0)
%77 = Constant[value = <Scalar Tensor []>]()
for NetTensorFlowWrapper- input 5x10x3 and transpose layer
graph torch-jit-export (
%input0[FLOAT, 5x10x3]
{
%9 = Transpose[perm = [0, 2, 1]](%input0)
%77 = Shape(%9)
%78 = Constant[value = <Scalar Tensor []>]()
...

I followed the tensorflow image segmentation tutorial, but the predicted mask is blank

I'd like to try image segmentation with my grayscale tif images (the shape of original images are (512,512) and the value of each pixel is between 0-2 or NaN which is in float32 type and the mask images have 0, 1, or NaN also in float32 type). I followed Google Colab and tensorflow tutorial to create the following code:
from glob import glob
from PIL import Image
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import layers
from tensorflow.python.keras import losses
from tensorflow.python.keras import models
from tensorflow.python.keras import backend as K
#get the path of my data
img = sorted(glob('train_sub_5/*.tif'))
mask = sorted(glob('train_mask_sub_5/*.tif'))
#split into train and test data
img, img_val, mask, mask_val = train_test_split(img, mask, test_size=0.2, random_state=42)
#load image as array and append to a list
train_image = []
for m in img:
img= Image.open(m)
img_arr = np.array(img)
stacked_img = np.stack((img_arr,)*1, axis=-1)
train_image.append(stacked_img)
train_mask = []
for n in mask:
mask= Image.open(n)
mask_arr= np.array(mask)
stacked_mask = np.stack((mask_arr,)*1, axis=-1)
train_mask.append(stacked_mask)
test_img = []
for o in img_val:
img= Image.open(o)
img_arr = np.array(img)
stacked_img = np.stack((img_arr,)*1, axis=-1)
test_img.append(stacked_img)
test_mask = []
for p in mask_val:
mask= Image.open(p)
mask_arr = np.array(mask)
stacked_mask = np.stack((mask_arr,)*1, axis=-1)
test_mask.append(stacked_mask)
#create TensorSliceDataset
for i, j in zip(train_image, train_mask):
train= tf.data.Dataset.from_tensor_slices(([i], [j]))
for k, l in zip(test_img, test_mask):
test= tf.data.Dataset.from_tensor_slices(([k], [l]))
#for visualization
def display(display_list):
plt.figure(figsize=(15, 15))
title = ['Input Image', 'True Mask', 'Predicted Mask']
for i in range(len(display_list)):
plt.subplot(1, len(display_list), i+1)
plt.title(title[i])
plt.imshow(display_list[i])
plt.axis('off')
plt.show()
for img, mask in train.take(1):
sample_image = img.numpy()[:,:,0]
sample_mask = mask.numpy()[:,:,0]
display([sample_image, sample_mask])
The output of the visualization looks normal like below:
out put of the visualization
#build the model
train_length = len(train_image)
img_shape = (512,512,1)
batch_size = 8
buffer_size = 5
epochs = 5
train_dataset = train.cache().shuffle(train_length).batch(batch_size).repeat()
train_dataset = train_dataset.prefetch(buffer_size)
test_dataset = test.batch(batch_size).repeat()
def conv_block(input_tensor, num_filters):
encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Activation('relu')(encoder)
encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Activation('relu')(encoder)
return encoder
def encoder_block(input_tensor, num_filters):
encoder = conv_block(input_tensor, num_filters)
encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)
return encoder_pool, encoder
def decoder_block(input_tensor, concat_tensor, num_filters):
decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
decoder = layers.concatenate([concat_tensor, decoder], axis=-1)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
return decoder
inputs = layers.Input(shape=img_shape)
# 256
encoder0_pool, encoder0 = encoder_block(inputs, 32)
# 128
encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64)
# 64
encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128)
# 32
encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256)
# 16
encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512)
# 8
center = conv_block(encoder4_pool, 1024)
# center
decoder4 = decoder_block(center, encoder4, 512)
# 16
decoder3 = decoder_block(decoder4, encoder3, 256)
# 32
decoder2 = decoder_block(decoder3, encoder2, 128)
# 64
decoder1 = decoder_block(decoder2, encoder1, 64)
# 128
decoder0 = decoder_block(decoder1, encoder0, 32)
# 256
outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)
model = models.Model(inputs=[inputs], outputs=[outputs])
def dice_coeff(y_true, y_pred):
smooth = 1.
# Flatten
y_true_f = tf.reshape(y_true, [-1])
y_pred_f = tf.reshape(y_pred, [-1])
intersection = tf.reduce_sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dice_coeff(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
model.compile(optimizer='adam', loss=bce_dice_loss, metrics=[dice_loss])
model.summary()
#save model
save_model_path = 'tmp/weights.hdf5'
cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path, monitor='val_dice_loss', mode='max', save_best_only=True)
#start training
history = model.fit(train_dataset,
steps_per_epoch=int(np.ceil(train_length / float(batch_size))),
epochs=epochs,
validation_data=test_dataset,
validation_steps=int(np.ceil(len(test_img) / float(batch_size))),
callbacks=[cp])
#training process visualization
dice = history.history['dice_loss']
val_dice = history.history['val_dice_loss']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, dice, label='Training Dice Loss')
plt.plot(epochs_range, val_dice, label='Validation Dice Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Dice Loss')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
The output of the training process visualization looks like below:
The output of the training process visualization
The model seems functioning.
#make prediction
def show_predictions(dataset=None, num=1):
for image, mask in dataset.take(num):
pred_mask = model.predict(image)
display([image[0,:,:,0], mask[0,:,:,0], create_mask(pred_mask)])
def create_mask(pred_mask):
pred_mask = tf.argmax(pred_mask, axis=-1)
pred_mask = pred_mask[..., tf.newaxis]
return pred_mask[0,:,:,0]
show_predictions(test_dataset, 3)
The output of the prediction is below:
The output of predictions
I tried to inspect the variables test and test_dataset using:
for img, mask in test:
print(img,mask)
But I only got one image array and one mask array. Does it mean that there's only one image array and one mask array in the dataset? What's wrong with my code creating train and test TensorSliceDataset?
The Second question is why I got the predicted mask blank? Is it because some of my patches have nan? As you can see in output, the white part of the input image and the true mask, the sea is represented by NaN. If this is the problem, how do I set the value for NaN if I hope the model can ignore sea?
Thank you for your help.
def display(display_list):
fig = plt.figure(figsize=(15, 15))
title = ['Input Image', 'True Mask', 'Predicted Mask']
for i in range(len(display_list)):
plt.subplot(1, len(display_list), i + 1)
plt.title(title[i])
plt.imshow(tf.keras.preprocessing.image.array_to_img
(display_list[i]))
plt.axis('off')
plt.show()
def show_predictions(dataset=None, num=1):
for image, mask in dataset.take(num):
pred_mask = model.predict(image)
pred_mask *= 255.0
print(pred_mask.min())
print(pred_mask.max())
print(np.unique(pred_mask, return_counts=True))
display([image[0], mask[0], pred_mask[0]])
show_predictions(test_dataset, 3)

Compound Poisson Keras custom loss function

I am trying to implement a custom loss function using Tensorflow as the negative loglikelihood of this expression (which is a compound Poisson-Gamma):
The first term (represented by the Dirac delta) refers to the case when z == 0, while the sum (which needs to be truncated at some point in the implementation as it goes to infinity) represents the product of the probability from a Gamma and a Poisson distribution.
This is the tentative implementation in Tensorflow:
import tensorflow as tf
import tensorflow_probability as tfp
from functools import partial
tf.enable_eager_execution()
import numpy as np
def pois_gamma_compound_loss(y_true, y_pred):
lambda_, alpha, beta = y_pred[:, 0], y_pred[:, 1], y_pred[:, 2]
poisson_distr = tfp.distributions.Poisson(rate=lambda_)
ijk_0 = (1.0, tf.zeros_like(y_true))
c = lambda i, p: i < 4
b = lambda i, p: (tf.add(i, 1),
p + tf.math.multiply(x = poisson_distr.prob(tf.zeros_like(y_true) + i),
y = tfp.distributions.Gamma(concentration=tf.math.multiply(x = alpha,
y = tf.zeros_like(y_true) + i),
rate=beta).prob(y_tru)))
ijk_final = tf.while_loop(c, b, ijk_0)
batch_lik = tf.add(ijk_final[1], tf.math.exp(tf.multiply(lambda_, -1.0)))
return -tf.reduce_mean(batch_lik)
inputs = Input(shape=(39,))
x = Dense(4, activation='relu', kernel_initializer='random_uniform')(inputs)
x = Dense(4, activation='relu', kernel_initializer='random_uniform')(inputs)
x = Dense(6, activation='relu', kernel_initializer='random_uniform')(inputs)
lambda_ = Dense(1, activation="softmax", name="lambda", kernel_initializer='random_uniform')(x)
alpha = Dense(1, activation="softmax", name="alpha", kernel_initializer='random_uniform')(x)
beta = Dense(1, activation="softmax", name="beta", kernel_initializer='random_uniform')(x)
output_params = Concatenate(name="pvec", axis=1)([lambda_, alpha, beta])
model = Model(inputs, output_params)
model.compile(loss=pois_gamma_compound_loss, optimizer='adam')
model.fit(X_train, y_train, epochs=60, batch_size=20)

how to calculate the derivate value of Multi-input models in keras by with tensorflow backend

My question is: I want to calculate the derivation of "time input" and "dense_input". Before asking question, I search the soluatoin of calculaing jacobian matrix by keras function.
After running it, But I got this error:
File "\keras\backend\tensorflow_backend.py", line 2614, in _call
dtype=tensor.dtype.base_dtype.name))
AttributeError: 'list' object has no attribute 'dtype'
Here is my simple version:
from keras.models import *
from keras.layers import *
import keras.backend as K
import pandas as pd
from keras import optimizers
def get_model(timestamp, features):
time_input = Input(shape=(timestamp, features,), name='time_input')
lstm_out = LSTM(4)(time_input)
dense_hidden_units = 2
dense_input_layer = Input(shape=(dense_length,), name='dense_input_layer')
final_input_layer = concatenate([lstm_out, dense_input_layer])
# Disable biases in the hidden layer
dense_1 = Dense(units=dense_hidden_units, use_bias=False, activation='sigmoid')(final_input_layer)
# Disable bias in output layer
output_layer = Dense(units=1, use_bias=False, name='final_output')(dense_1)
model = Model(
inputs=[time_input, dense_input_layer],
outputs=output_layer
)
print(model.summary())
return model
if __name__ == '__main__':
timestamp = 3
features = 1
dense_length = 3
temp_data = pd.DataFrame([
[1, 2, 3, 2, 3, 4],
])
time_data = temp_data.values.reshape(-1, timestamp, features)
dense_data = temp_data.values.reshape(-1, dense_length)
target_data = np.array([1, 2])
print(time_data.shape)
print(dense_data.shape)
print(target_data.shape)
model = get_model(
timestamp, features
)
Ada = optimizers.Adagrad(lr=0.09, epsilon=1e-04)
model.compile(loss='mse', optimizer=Ada, metrics=['mse'])
model.fit(
{
'time_input': time_data,
'dense_input_layer': dense_data,
},
{
'final_output': target_data
},
epochs=1, batch_size=1
)
time_input = model.get_layer('time_input').input
GPP_input_layer = model.get_layer('dense_input_layer').input
J = K.gradients(model.output, [time_input, GPP_input_layer])
jacobianTime = K.function([[time_input, GPP_input_layer], K.learning_phase()], J)
deriRes = jacobianTime([time_data, dense_data]) # this line throw exception
print(deriRes[0])
Thanks for help!
You have an extra set of brackets.
jacobianTime = K.function([[time_input, GPP_input_layer], K.learning_phase()], J)
to
jacobianTime = K.function([time_input, GPP_input_layer, K.learning_phase()], J)
I was able to run your code like this at least.