Implementing contrastive loss function in mxnet - mxnet

I want to train siamese net using depth image obtaining from kinect.I want to use contrastive loss function to train this network, but I'm not find contrastive loss function in mxnet.My implement is as follow:
def LossFunc(distance, label, margin):
distance = distance.reshape(label.shape)
dis_positive = distance * label
dis_negative = margin - distance
zeros = nd.zeros(label.shape, ctx=ctx)
dis_negative = nd.concat(dis_negative, zeros, dim=1)
dis_negative = nd.max(dis_negative, axis=1).reshape(label.shape)
dis_negative = (1-label) * dis_negative
return 0.5 * dis_positive**2 + 0.5 * dis_negative**2
Is it right?

Here is the implementation of the Contrastive loss using Gluon API:
class ContrastiveLoss(Loss):
def __init__(self, margin=2.0, weight=None, batch_axis=0, **kwargs):
super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs)
self.margin = margin
def hybrid_forward(self, F, output1, output2, label):
euclidean_distance = F.sqrt(F.square(output1 - output2))
loss_contrastive = F.mean(((1-label) * F.square(euclidean_distance) +
label * F.square(F.clip(self.margin - euclidean_distance, 0.0, 10))))
return loss_contrastive
I have implemented it based on PyTorch example how to use Siamese net taken from here.
There are quite some differences in PyTorch and MxNet, so if you want to try this one out, here is the full runnable example. You would need to download the AT&T faces data though and convert images to jpeg as mxnet doesn't support loading .pgm images out of the box.
import matplotlib.pyplot as plt
import numpy as np
import random
from PIL import Image
import PIL.ImageOps
import mxnet as mx
from mxnet import autograd
from mxnet.base import numeric_types
from mxnet.gluon import nn, HybridBlock, Trainer
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision.datasets import ImageFolderDataset
from mxnet.gluon.loss import Loss
def imshow(img,text=None, should_save=False):
npimg = img.numpy()
plt.axis("off")
if text:
plt.text(75, 8, text, style='italic',fontweight='bold',
bbox={'facecolor':'white', 'alpha':0.8, 'pad':10})
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
def show_plot(iteration, loss):
plt.plot(iteration, loss)
plt.show()
class Config:
training_dir = "./faces/training/"
testing_dir = "./faces/testing/"
train_batch_size = 5
train_number_epochs = 100
class SiameseNetworkDataset(ImageFolderDataset):
def __init__(self, root, transform=None):
super().__init__(root, flag=0, transform=transform)
self.root = root
self.transform = transform
def __getitem__(self, index):
items_with_index = list(enumerate(self.items))
img0_index, img0_tuple = random.choice(items_with_index)
# we need to make sure approx 50% of images are in the same class
should_get_same_class = random.randint(0, 1)
if should_get_same_class:
while True:
# keep looping till the same class image is found
img1_index, img1_tuple = random.choice(items_with_index)
if img0_tuple[1] == img1_tuple[1]:
break
else:
img1_index, img1_tuple = random.choice(items_with_index)
img0 = super().__getitem__(img0_index)
img1 = super().__getitem__(img1_index)
return img0[0].transpose(), img1[0].transpose(), mx.nd.array(mx.nd.array([int(img1_tuple[1] != img0_tuple[1])]))
def __len__(self):
return super().__len__()
class ReflectionPad2D(HybridBlock):
"""Pads the input tensor using the reflection of the input boundary.
Parameters
----------
padding: int
An integer padding size
Shape:
- Input: :math:`(N, C, H_{in}, W_{in})`
- Output: :math:`(N, C, H_{out}, W_{out})` where
:math:`H_{out} = H_{in} + 2 * padding
:math:`W_{out} = W_{in} + 2 * padding
"""
def __init__(self, padding=0, **kwargs):
super(ReflectionPad2D, self).__init__(**kwargs)
if isinstance(padding, numeric_types):
padding = (0, 0, 0, 0, padding, padding, padding, padding)
assert(len(padding) == 8)
self._padding = padding
def hybrid_forward(self, F, x, *args, **kwargs):
return F.pad(x, mode='reflect', pad_width=self._padding)
class SiameseNetwork(HybridBlock):
def __init__(self):
super(SiameseNetwork, self).__init__()
self.cnn1 = nn.HybridSequential()
with self.cnn1.name_scope():
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=1, channels=4, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=4, channels=8, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=8, channels=8, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.fc1 = nn.HybridSequential()
with self.fc1.name_scope():
self.cnn1.add(nn.Dense(500)),
self.cnn1.add(nn.Activation('relu')),
self.cnn1.add(nn.Dense(500)),
self.cnn1.add(nn.Activation('relu')),
self.cnn1.add(nn.Dense(5))
def hybrid_forward(self, F, input1, input2):
output1 = self._forward_once(input1)
output2 = self._forward_once(input2)
return output1, output2
def _forward_once(self, x):
output = self.cnn1(x)
#output = output.reshape((output.shape[0],))
output = self.fc1(output)
return output
class ContrastiveLoss(Loss):
def __init__(self, margin=2.0, weight=None, batch_axis=0, **kwargs):
super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs)
self.margin = margin
def hybrid_forward(self, F, output1, output2, label):
euclidean_distance = F.sqrt(F.square(output1 - output2))
loss_contrastive = F.mean(((1-label) * F.square(euclidean_distance) +
label * F.square(F.clip(self.margin - euclidean_distance, 0.0, 10))))
return loss_contrastive
def aug_transform(data, label):
augs = mx.image.CreateAugmenter(data_shape=(1, 100, 100))
for aug in augs:
data = aug(data)
return data, label
def run_training():
siamese_dataset = SiameseNetworkDataset(root=Config.training_dir,transform=aug_transform)
train_dataloader = DataLoader(siamese_dataset, shuffle=True, num_workers=1, batch_size=Config.train_batch_size)
counter = []
loss_history = []
iteration_number = 0
net = SiameseNetwork()
net.initialize(init=mx.init.Xavier())
trainer = Trainer(net.collect_params(), 'adam', {'learning_rate': 0.0005})
loss = ContrastiveLoss(margin=2.0)
for epoch in range(0, Config.train_number_epochs):
for i, data in enumerate(train_dataloader, 0):
img0, img1, label = data
with autograd.record():
output1, output2 = net(img0, img1)
loss_contrastive = loss(output1, output2, label)
loss_contrastive.backward()
trainer.step(Config.train_batch_size)
if i % 10 == 0:
print("Epoch number {}\n Current loss {}\n".format(epoch, loss_contrastive))
iteration_number += 10
counter.append(iteration_number)
loss_history.append(loss_contrastive)
#show_plot(counter, loss_history)
return net
def run_predict(net):
folder_dataset_test = SiameseNetworkDataset(root=Config.testing_dir,transform=aug_transform)
test_dataloader = DataLoader(folder_dataset_test, shuffle=True, num_workers=1, batch_size=Config.train_batch_size)
dataiter = iter(test_dataloader)
x0, _, _ = next(dataiter)
_, x1, label2 = next(dataiter)
output1, output2 = net(x0, x1)
euclidean_distance = mx.ndarray.sqrt(mx.ndarray.square(output1 - output2))
print('x0 vs x1 dissimilarity is {}'.format(euclidean_distance[0][0]))
if __name__ == '__main__':
net = run_training()
run_predict(net)

Related

RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1

I have been working with Swin Transformers Attention MaP. Below is my code implementation
from PIL import Image
import numpy
import sys
from torchvision import transforms
import numpy as np
import cv2
def rollout(attentions, discard_ratio, head_fusion):
result = torch.eye(attentions[0].size(-1))
with torch.no_grad():
for attention in attentions:
# print(attentions)
if head_fusion == "mean":
attention_heads_fused = attention.mean(axis=1)
elif head_fusion == "max":
attention_heads_fused = attention.max(axis=1)[0]
elif head_fusion == "min":
attention_heads_fused = attention.min(axis=1)[0]
else:
raise "Attention head fusion type Not supported"
# Drop the lowest attentions, but
# don't drop the class token
flat = attention_heads_fused.view(attention_heads_fused.size(0), -1)
# print(flat)
_, indices = flat.topk(int(flat.size(-1)*discard_ratio), -1, False)
# print("_ : ",_," indices : ",indices)
indices = indices[indices != 0]
flat[0, indices] = 0
I = torch.eye(attention_heads_fused.size(-1))
# print("I : ",I)
a = (attention_heads_fused + 1.0*I)/2
# print("a : ",a)
# print(a.size())
print(a.sum(dim=-1))
a = a / a.sum(dim=-1)
result = torch.matmul(a, result)
# print("result : ",result)
# Look at the total attention between the class token,
# and the image patches
mask = result[0, 0 , 1 :]
# In case of 224x224 image, this brings us from 196 to 14
width = int(mask.size(-1)**0.5)
mask = mask.reshape(width, width).numpy()
mask = mask / np.max(mask)
return mask
class VITAttentionRollout:
def __init__(self, model, attention_layer_name='dropout', head_fusion="mean",
discard_ratio=0.9):
self.model = model
self.head_fusion = head_fusion
self.discard_ratio = discard_ratio
# print(self.model.named_modules())
for name, module in self.model.named_modules():
# print("Name : ",name," Module : ",module)
if attention_layer_name in name:
module.register_forward_hook(self.get_attention)
# print(self.attentions)
self.attentions = []
def get_attention(self, module, input, output):
self.attentions.append(output.cpu())
def __call__(self, input_tensor):
self.attentions = []
with torch.no_grad():
output = self.model(**input_tensor)
# print(output)
return rollout(self.attentions, self.discard_ratio, self.head_fusion)
This is the main program
import sys
import torch
from PIL import Image
from torchvision import transforms
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
# from vit_rollout import VITAttentionRollout
from vit_grad_rollout import VITAttentionGradRollout
def show_mask_on_image(img, mask):
img = np.float32(img) / 255
heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
cam = heatmap + np.float32(img)
cam = cam / np.max(cam)
return np.uint8(255 * cam)
if __name__ == '__main__':
model.eval()
image_path = '/content/both.jpg'
category_index = None
head_fusion = 'max'
discard_ratio = 0.9
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]),
])
img = Image.open(image_path)
img = img.resize((224, 224))
input_tensor = feature_extractor(img, return_tensors="pt")
#print(input_tensor)
if category_index is None:
print("Doing Attention Rollout")
attention_rollout = VITAttentionRollout(model, head_fusion=head_fusion,
discard_ratio=discard_ratio)
mask = attention_rollout(input_tensor)
name = "attention_rollout_{:.3f}_{}.png".format(discard_ratio, head_fusion)
else:
print("Doing Gradient Attention Rollout")
grad_rollout = VITAttentionGradRollout(model, discard_ratio=discard_ratio)
mask = grad_rollout(input_tensor, category_index)
name = "grad_rollout_{}_{:.3f}_{}.png".format(category_index,
discard_ratio, head_fusion)
np_img = np.array(img)[:, :, ::-1]
mask = cv2.resize(mask, (np_img.shape[1], np_img.shape[0]))
mask = show_mask_on_image(np_img, mask)
cv2_imshow(np_img)
cv2_imshow(mask)
cv2.imwrite("input.jpg",np_img)
cv2.imwrite(name, mask)
cv2.waitKey(-1)
I am referring the git project https://github.com/jacobgil/vit-explain
But I am getting the error as RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1
I researched some git projects but there is very much less information on Swin Transformers. So is there any way that I can make an attention map for Swin transformers models ?
Please help with it
Thanks in advance

Unable to detect multiple faces at a time

For some reason, I'm not able to detect multiple faces at a time. It's only detecting one face at one time. How do i resolve this issue? I've added the code below. I've used google's facenet for real time face recognition.
In the video output it creates a bounding box only on one face at a time. But in the console output it can count that the number of faces present are two or more than two.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
import cv2
import matplotlib.pyplot as plt
import numpy as np
import argparse
import facenet
import detect_face
import os
from os.path import join as pjoin
import sys
import time
import copy
import math
import pickle
from sklearn.svm import SVC
from sklearn.externals import joblib
#addded
#import reload
#reload(sys)
#sys.setdefaultencoding('utf8')
print('Creating networks and loading parameters')
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, './') #face detection
minsize = 20 # minimum size of face #minsize, threshold, factor used for detection
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
items = os.listdir("/Aryabhatta Robotics Internship/facenet-master/Real_time_face/ids/aligned")
#HumanNames = []
#for names in items:
#HumanNames.append(names)
#print(HumanNames)
#HumanNames = ['Alok','Siddhant','tesra','s01','s02','s03','s04','s05','s06','s07','s08','s09','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20'] #train human name, known face names
print('Loading feature extraction model')
modeldir = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/20180402-114759/20180402-114759.pb' #feature extraction mmodel
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/my_classifier/my_classifier.pkl' #out own classifier
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)#, encoding='latin1')
print('load classifier file-> %s' % classifier_filename_exp)
video_capture = cv2.VideoCapture(0)
c = 0
# #video writer
# fourcc = cv2.VideoWriter_fourcc(*'DIVX')
# out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(640,480))
print('Start Recognition!')
prevTime = 0
while True: #infinite loop
ret, frame = video_capture.read() #video capture from webcam
frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time() # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Detected_FaceNum: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
print("faceno:" + str(i))
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is inner of range!')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[0] = facenet.flip(cropped[0], False)
scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[0] = facenet.prewhiten(scaled[0])
scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
#print(emb_array)
threshold_accuracy = 155
predictions = model.predict_proba(emb_array)
#print(predictions)
for i in range(len(predictions[0])):
predictions[0][i] = np.exp(18*predictions[0][i])
#print(predictions)
best_class_indices = np.argmax(predictions, axis=1)
print(best_class_indices)
print("next")
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
print(best_class_probabilities)
for i in range(len(best_class_indices)):
print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i]))
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
#plot result idx under box
text_x = bb[i][0]
text_y = bb[i][3] + 20
# print('result: ', best_class_indices[0])
if best_class_probabilities[i] > threshold_accuracy :
#result_names = HumanNames[best_class_indices[0]]
cv2.putText(frame, class_names[best_class_indices[i]], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), thickness=1, lineType=2)
else:
cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), thickness=1, lineType=2)
#for H_i in HumanNames:
#if HumanNames[best_class_indices[0]] == H_i and best_class_probabilities[0] > threshold_accuracy :
#flag = 1
#result_names = HumanNames[best_class_indices[0]]
#cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
#1, (0, 0, 255), thickness=1, lineType=2)
#else:
#cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
# 1, (0, 0, 255), thickness=1, lineType=2)
else:
print('Unable to align')
sec = curTime - prevTime
prevTime = curTime
fps = 1 / (sec)
str1 = 'FPS: %2.3f' % fps
text_fps_x = len(frame[0]) - 150
text_fps_y = 20
cv2.putText(frame, str1, (text_fps_x, text_fps_y),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
# c+=1
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
# #video writer
# out.release()
cv2.destroyAllWindows()
Threshold accuracy should be between 0 to 1. Make sure Your threshold accuracy has to be >0.60.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
from skimage.transform import resize
import cv2
import numpy as np
import facenet
import detect_face
import os
import time
import pickle
import sys
img_path='download.jpeg'
modeldir = './model/20170511-185253.pb'
classifier_filename = './class/classifier.pkl'
npy='./npy'
train_img="./train_img"
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)
minsize = 10 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 1 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
HumanNames = os.listdir(train_img)
HumanNames.sort()
print('Loading feature extraction model')
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)
# video_capture = cv2.VideoCapture("akshay_mov.mp4")
c = 0
print('Start Recognition!')
prevTime = 0
# ret, frame = video_capture.read()
frame = cv2.imread(img_path,0)
frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time()+1 # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
print(1)
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Face Detected: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is too close')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[i] = facenet.flip(cropped[i], False)
scaled.append(resize(cropped[i], (image_size, image_size), anti_aliasing=True))
scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[i] = facenet.prewhiten(scaled[i])
scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
predictions = model.predict_proba(emb_array)
print(predictions)
best_class_indices = np.argmax(predictions, axis=1)
# print(best_class_indices)
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
print(best_class_probabilities)
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
#plot result idx under box
text_x = bb[i][0]
text_y = bb[i][3] + 20
print('Result Indices: ', best_class_indices[0])
print(HumanNames)
for H_i in HumanNames:
# print(H_i)
if HumanNames[best_class_indices[0]] == H_i:
result_names = HumanNames[best_class_indices[0]]
else:
print('Unable to align')
cv2.imshow('Image', frame)
if cv2.waitKey(100) & 0xFF == ord('q'):
sys.exit("Thanks")
cv2.destroyAllWindows()

why does tf.estimator.DNNRegressor predict negative y value?

It is so weird for the predict() function in tf.estimator.DNNRegressor because it predict negative y value, but the training dataset has no negative y value. I found this when I reduced the value of y by 1000 times, say if y was 12000 before, now I change it to 12. The range of y is [3-400] now, but after I did this, the predict() function output some negative values. I didn't set the active function in tf.estimator.DNNRegressor, so the default active function is relu which range is [0-max], but why it predicts negative value? is some bug in tf.estimator.DNNRegressor? or is there no active function applied for y? Thank you.
The code is:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import itertools
import pandas as pd
import tensorflow as tf
from sklearn import datasets, metrics
import csv
tf.logging.set_verbosity(tf.logging.INFO)
COLUMNS = ["col1","col2","col3","col4","col5","col6","col7","col8","col9","col10","col11","col12","col13","col14","col15","col16","col17","col18","col19","col20","col21","col22","col23","col24","col25","col26","col27","col28","col29","col30","col31","col32","col33","col34","col35","col36","col37","col38","col39","col40","col41","col42","col43","col44","col45","col46","col47","col48","col49","col50","col51","col52","col53","col54","col55","col56","col57","col58","col59","col60","col61","col62","col63","col64","col65","col66","col67","col68","col69","col70","col71","col72","col73","col74","col75","col76","col77","col78","col79","col80","col81","col82","col83","col84","col85","col86","col87","col88","col89","col90","col91","col92","col93","col94","col95","col96","col97","col98","col99","col100","col101","col102","col103","col104","col105","col106","col107","col108","col109","col110","col111","col112","col113","col114","col115","col116","col117","col118","col119","col120","col121","col122","col123","col124","col125","col126","col127","col128","col129","col130","col131","col132","col133","col134","col135","col136","col137","col138","col139","col140","col141","col142","col143","col144","col145","col146","col147","col148","col149","col150","col151","col152","col153","col154","col155","col156","col157","col158","col159","col160","col161","col162","col163","col164","col165","col166","col167","col168","col169","col170","col171","col172","col173","col174","col175","col176","col177","col178","col179","col180","col181","col182","col183","col184","col185","col186","col187","col188","col189","col190","col191","col192","col193","col194","col195","col196","col197","col198","col199","col200","col201","col202","col203","col204","col205","col206","col207","col208","col209","col210","col211","col212","col213","col214"]
FEATURES = ["col1","col2","col3","col4","col5","col6","col7","col8","col9","col10","col11","col12","col13","col14","col15","col16","col17","col18","col19","col20","col21","col22","col23","col24","col25","col26","col27","col28","col29","col30","col31","col32","col33","col34","col35","col36","col37","col38","col39","col40","col41","col42","col43","col44","col45","col46","col47","col48","col49","col50","col51","col52","col53","col54","col55","col56","col57","col58","col59","col60","col61","col62","col63","col64","col65","col66","col67","col68","col69","col70","col71","col72","col73","col74","col75","col76","col77","col78","col79","col80","col81","col82","col83","col84","col85","col86","col87","col88","col89","col90","col91","col92","col93","col94","col95","col96","col97","col98","col99","col100","col101","col102","col103","col104","col105","col106","col107","col108","col109","col110","col111","col112","col113","col114","col115","col116","col117","col118","col119","col120","col121","col122","col123","col124","col125","col126","col127","col128","col129","col130","col131","col132","col133","col134","col135","col136","col137","col138","col139","col140","col141","col142","col143","col144","col145","col146","col147","col148","col149","col150","col151","col152","col153","col154","col155","col156","col157","col158","col159","col160","col161","col162","col163","col164","col165","col166","col167","col168","col169","col170","col171","col172","col173","col174","col175","col176","col177","col178","col179","col180","col181","col182","col183","col184","col185","col186","col187","col188","col189","col190","col191","col192","col193","col194","col195","col196","col197","col198","col199","col200","col201","col202","col203","col204","col205","col206","col207","col208","col209","col211","col212","col213"]
LABEL = "col214"
def get_input_fn(data_set, num_epochs=None, shuffle=True):
return tf.estimator.inputs.pandas_input_fn(
x=pd.DataFrame({k: data_set[k].values for k in FEATURES}),
y=pd.Series(data_set[LABEL].values),
num_epochs=num_epochs,
shuffle=shuffle)
def get_mae(y_pre, y_target):
absError = []
for i in range(len(y_pre)):
absError.append(abs(y_pre[i] - y_target[i]))
return sum(absError) / len(absError)
def get_mse(y_pre, y_target):
squaredError = []
for i in range(len(y_pre)):
val = y_pre[i] - y_target[i]
squaredError.append(val * val)
return sum(squaredError) / len (squaredError)
training_set = pd.read_csv("train.csv", skipinitialspace=True, skiprows=1, names=COLUMNS)
test_set = pd.read_csv("test.csv", skipinitialspace=True, skiprows=1, names=COLUMNS)
predict_set = pd.read_csv("predict.csv", skipinitialspace=True, skiprows=1, names=COLUMNS)
feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]
regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, hidden_units=[250, 200, 100, 50], model_dir="./model")
regressor.train(input_fn=get_input_fn(training_set), steps=8000)
ev = regressor.evaluate(input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False))
loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))
predict = regressor.predict(input_fn=get_input_fn(predict_set, num_epochs=1, shuffle=False))
y_predict = predict_set[LABEL].values.tolist()
print(type(y_predict))
print(y_predict)
list_predict = list(predict)
print(type(list_predict))
y_predicted = []
for i in range(len(list_predict)):
y_predicted.append(list_predict[i]['predictions'][0])
print(y_predicted)
fileObject = open('time_prediction.txt', 'w')
for time in y_predicted:
fileObject.write(str(time))
fileObject.write('\n')
fileObject.close()
mae = get_mae(y_predict, y_predicted)
mse = get_mse(y_predict, y_predicted)
print("Mean Absolute Error:" + str(mae) + " Mean Squared Error:" + str(mse))
#mae = tf.metrics.mean_absolute_error(y_predict, list_predict)
#print(mea)
This is the 3 data records of the dataset:
2399.998,4,100,100,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,2,44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,1,4,13,4,0,11,14,15,10,8,0,0,3,1,0,0,0,0,0,0,0,0,0,0,1,364,123428,1397595,16772133,56,103,16772153,22,22,11
1919.9984,2,30,30,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,0,0,12,2,0,9,14,10,9,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,17525535,34347257,1397595,5590711,16698805,103,5913257,896853,1190468,25
479.9996,2,60,60,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,0,0,12,2,0,9,14,10,9,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,17525535,34347257,1397595,5590711,16698805,103,5913257,896853,1190468,168
The last column is y.

How to use cv2 image data as batch_x input in tensorflow

I want to use the imgs of my screen as tensorflow input data.
For recording the images I am using the script down below.
The image data is saved as uint8 but tf needs float32.
So, how to convert it that i can input the image in feed_dict
loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,Y: batch_y})
as batch_x?
import numpy as np
import tensorflow as ts
import win32gui, win32ui, win32con, win32api
import cv2
def grab_frame(size=None):
hwin = win32gui.GetDesktopWindow()
if size:
left, top, x, y = size
width= x - left + 1
height= y - top +1
else:
width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN)
top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN)
hwindc = win32gui.GetWindowDC(hwin)
srcdc = win32ui.CreateDCFromHandle(hwindc)
memdc = srcdc.CreateCompatibleDC()
bmp = win32ui.CreateBitmap()
bmp.CreateCompatibleBitmap(srcdc, width, height)
memdc.SelectObject(bmp)
memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)
signedIntsArray = bmp.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (height, width, 4)
srcdc.DeleteDC()
memdc.DeleteDC()
win32gui.ReleaseDC(hwin, hwindc)
win32gui.DeleteObject(bmp.GetHandle())
img_res = cv2.resize(img, (480,270))
return img_res

Include matplotlib in pyqt5 with hover labels

I have a plot from matplotlib for which I would like to display labels on the marker points when hover over with the mouse.
I found this very helpful working example on SO and I was trying to integrate the exact same plot into a pyqt5 application.
Unfortunately when having the plot in the application the hovering doesn't work anymore.
Here is a full working example based on the mentioned SO post:
import matplotlib.pyplot as plt
import scipy.spatial as spatial
import numpy as np
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
import sys
pi = np.pi
cos = np.cos
def fmt(x, y):
return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x=x, y=y)
class FollowDotCursor(object):
"""Display the x,y location of the nearest data point.
https://stackoverflow.com/a/4674445/190597 (Joe Kington)
https://stackoverflow.com/a/13306887/190597 (unutbu)
https://stackoverflow.com/a/15454427/190597 (unutbu)
"""
def __init__(self, ax, x, y, tolerance=5, formatter=fmt, offsets=(-20, 20)):
try:
x = np.asarray(x, dtype='float')
except (TypeError, ValueError):
x = np.asarray(mdates.date2num(x), dtype='float')
y = np.asarray(y, dtype='float')
mask = ~(np.isnan(x) | np.isnan(y))
x = x[mask]
y = y[mask]
self._points = np.column_stack((x, y))
self.offsets = offsets
y = y[np.abs(y-y.mean()) <= 3*y.std()]
self.scale = x.ptp()
self.scale = y.ptp() / self.scale if self.scale else 1
self.tree = spatial.cKDTree(self.scaled(self._points))
self.formatter = formatter
self.tolerance = tolerance
self.ax = ax
self.fig = ax.figure
self.ax.xaxis.set_label_position('top')
self.dot = ax.scatter(
[x.min()], [y.min()], s=130, color='green', alpha=0.7)
self.annotation = self.setup_annotation()
plt.connect('motion_notify_event', self)
def scaled(self, points):
points = np.asarray(points)
return points * (self.scale, 1)
def __call__(self, event):
ax = self.ax
# event.inaxes is always the current axis. If you use twinx, ax could be
# a different axis.
if event.inaxes == ax:
x, y = event.xdata, event.ydata
elif event.inaxes is None:
return
else:
inv = ax.transData.inverted()
x, y = inv.transform([(event.x, event.y)]).ravel()
annotation = self.annotation
x, y = self.snap(x, y)
annotation.xy = x, y
annotation.set_text(self.formatter(x, y))
self.dot.set_offsets((x, y))
bbox = ax.viewLim
event.canvas.draw()
def setup_annotation(self):
"""Draw and hide the annotation box."""
annotation = self.ax.annotate(
'', xy=(0, 0), ha = 'right',
xytext = self.offsets, textcoords = 'offset points', va = 'bottom',
bbox = dict(
boxstyle='round,pad=0.5', fc='yellow', alpha=0.75),
arrowprops = dict(
arrowstyle='->', connectionstyle='arc3,rad=0'))
return annotation
def snap(self, x, y):
"""Return the value in self.tree closest to x, y."""
dist, idx = self.tree.query(self.scaled((x, y)), k=1, p=1)
try:
return self._points[idx]
except IndexError:
# IndexError: index out of bounds
return self._points[0]
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.width = 1000
self.height = 800
self.setGeometry(0, 0, self.width, self.height)
canvas = self.get_canvas()
w = QWidget()
w.layout = QHBoxLayout()
w.layout.addWidget(canvas)
w.setLayout(w.layout)
self.setCentralWidget(w)
self.show()
def get_canvas(self):
fig, ax = plt.subplots()
x = np.linspace(0.1, 2*pi, 10)
y = cos(x)
markerline, stemlines, baseline = ax.stem(x, y, '-.')
plt.setp(markerline, 'markerfacecolor', 'b')
plt.setp(baseline, 'color','r', 'linewidth', 2)
cursor = FollowDotCursor(ax, x, y, tolerance=20)
canvas = FigureCanvas(fig)
return canvas
app = QApplication(sys.argv)
win = MainWindow()
sys.exit(app.exec_())
What would I have to do to make the labels also show when hovering over in the pyqt application?
The first problem may be that you don't keep a reference to the FollowDotCursor.
So to make sure the FollowDotCursor stays alive, you can make it a class variable
self.cursor = FollowDotCursor(ax, x, y, tolerance=20)
instead of cursor = ....
Next make sure you instatiate the Cursor class after giving the figure a canvas.
canvas = FigureCanvas(fig)
self.cursor = FollowDotCursor(ax, x, y, tolerance=20)
Finally, keep a reference to the callback inside the FollowDotCursor and don't use plt.connect but the canvas itself:
self.cid = self.fig.canvas.mpl_connect('motion_notify_event', self)