Implementing contrastive loss function in mxnet - mxnet
I want to train siamese net using depth image obtaining from kinect.I want to use contrastive loss function to train this network, but I'm not find contrastive loss function in mxnet.My implement is as follow:
def LossFunc(distance, label, margin):
distance = distance.reshape(label.shape)
dis_positive = distance * label
dis_negative = margin - distance
zeros = nd.zeros(label.shape, ctx=ctx)
dis_negative = nd.concat(dis_negative, zeros, dim=1)
dis_negative = nd.max(dis_negative, axis=1).reshape(label.shape)
dis_negative = (1-label) * dis_negative
return 0.5 * dis_positive**2 + 0.5 * dis_negative**2
Is it right?
Here is the implementation of the Contrastive loss using Gluon API:
class ContrastiveLoss(Loss):
def __init__(self, margin=2.0, weight=None, batch_axis=0, **kwargs):
super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs)
self.margin = margin
def hybrid_forward(self, F, output1, output2, label):
euclidean_distance = F.sqrt(F.square(output1 - output2))
loss_contrastive = F.mean(((1-label) * F.square(euclidean_distance) +
label * F.square(F.clip(self.margin - euclidean_distance, 0.0, 10))))
return loss_contrastive
I have implemented it based on PyTorch example how to use Siamese net taken from here.
There are quite some differences in PyTorch and MxNet, so if you want to try this one out, here is the full runnable example. You would need to download the AT&T faces data though and convert images to jpeg as mxnet doesn't support loading .pgm images out of the box.
import matplotlib.pyplot as plt
import numpy as np
import random
from PIL import Image
import PIL.ImageOps
import mxnet as mx
from mxnet import autograd
from mxnet.base import numeric_types
from mxnet.gluon import nn, HybridBlock, Trainer
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision.datasets import ImageFolderDataset
from mxnet.gluon.loss import Loss
def imshow(img,text=None, should_save=False):
npimg = img.numpy()
plt.axis("off")
if text:
plt.text(75, 8, text, style='italic',fontweight='bold',
bbox={'facecolor':'white', 'alpha':0.8, 'pad':10})
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
def show_plot(iteration, loss):
plt.plot(iteration, loss)
plt.show()
class Config:
training_dir = "./faces/training/"
testing_dir = "./faces/testing/"
train_batch_size = 5
train_number_epochs = 100
class SiameseNetworkDataset(ImageFolderDataset):
def __init__(self, root, transform=None):
super().__init__(root, flag=0, transform=transform)
self.root = root
self.transform = transform
def __getitem__(self, index):
items_with_index = list(enumerate(self.items))
img0_index, img0_tuple = random.choice(items_with_index)
# we need to make sure approx 50% of images are in the same class
should_get_same_class = random.randint(0, 1)
if should_get_same_class:
while True:
# keep looping till the same class image is found
img1_index, img1_tuple = random.choice(items_with_index)
if img0_tuple[1] == img1_tuple[1]:
break
else:
img1_index, img1_tuple = random.choice(items_with_index)
img0 = super().__getitem__(img0_index)
img1 = super().__getitem__(img1_index)
return img0[0].transpose(), img1[0].transpose(), mx.nd.array(mx.nd.array([int(img1_tuple[1] != img0_tuple[1])]))
def __len__(self):
return super().__len__()
class ReflectionPad2D(HybridBlock):
"""Pads the input tensor using the reflection of the input boundary.
Parameters
----------
padding: int
An integer padding size
Shape:
- Input: :math:`(N, C, H_{in}, W_{in})`
- Output: :math:`(N, C, H_{out}, W_{out})` where
:math:`H_{out} = H_{in} + 2 * padding
:math:`W_{out} = W_{in} + 2 * padding
"""
def __init__(self, padding=0, **kwargs):
super(ReflectionPad2D, self).__init__(**kwargs)
if isinstance(padding, numeric_types):
padding = (0, 0, 0, 0, padding, padding, padding, padding)
assert(len(padding) == 8)
self._padding = padding
def hybrid_forward(self, F, x, *args, **kwargs):
return F.pad(x, mode='reflect', pad_width=self._padding)
class SiameseNetwork(HybridBlock):
def __init__(self):
super(SiameseNetwork, self).__init__()
self.cnn1 = nn.HybridSequential()
with self.cnn1.name_scope():
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=1, channels=4, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=4, channels=8, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=8, channels=8, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.fc1 = nn.HybridSequential()
with self.fc1.name_scope():
self.cnn1.add(nn.Dense(500)),
self.cnn1.add(nn.Activation('relu')),
self.cnn1.add(nn.Dense(500)),
self.cnn1.add(nn.Activation('relu')),
self.cnn1.add(nn.Dense(5))
def hybrid_forward(self, F, input1, input2):
output1 = self._forward_once(input1)
output2 = self._forward_once(input2)
return output1, output2
def _forward_once(self, x):
output = self.cnn1(x)
#output = output.reshape((output.shape[0],))
output = self.fc1(output)
return output
class ContrastiveLoss(Loss):
def __init__(self, margin=2.0, weight=None, batch_axis=0, **kwargs):
super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs)
self.margin = margin
def hybrid_forward(self, F, output1, output2, label):
euclidean_distance = F.sqrt(F.square(output1 - output2))
loss_contrastive = F.mean(((1-label) * F.square(euclidean_distance) +
label * F.square(F.clip(self.margin - euclidean_distance, 0.0, 10))))
return loss_contrastive
def aug_transform(data, label):
augs = mx.image.CreateAugmenter(data_shape=(1, 100, 100))
for aug in augs:
data = aug(data)
return data, label
def run_training():
siamese_dataset = SiameseNetworkDataset(root=Config.training_dir,transform=aug_transform)
train_dataloader = DataLoader(siamese_dataset, shuffle=True, num_workers=1, batch_size=Config.train_batch_size)
counter = []
loss_history = []
iteration_number = 0
net = SiameseNetwork()
net.initialize(init=mx.init.Xavier())
trainer = Trainer(net.collect_params(), 'adam', {'learning_rate': 0.0005})
loss = ContrastiveLoss(margin=2.0)
for epoch in range(0, Config.train_number_epochs):
for i, data in enumerate(train_dataloader, 0):
img0, img1, label = data
with autograd.record():
output1, output2 = net(img0, img1)
loss_contrastive = loss(output1, output2, label)
loss_contrastive.backward()
trainer.step(Config.train_batch_size)
if i % 10 == 0:
print("Epoch number {}\n Current loss {}\n".format(epoch, loss_contrastive))
iteration_number += 10
counter.append(iteration_number)
loss_history.append(loss_contrastive)
#show_plot(counter, loss_history)
return net
def run_predict(net):
folder_dataset_test = SiameseNetworkDataset(root=Config.testing_dir,transform=aug_transform)
test_dataloader = DataLoader(folder_dataset_test, shuffle=True, num_workers=1, batch_size=Config.train_batch_size)
dataiter = iter(test_dataloader)
x0, _, _ = next(dataiter)
_, x1, label2 = next(dataiter)
output1, output2 = net(x0, x1)
euclidean_distance = mx.ndarray.sqrt(mx.ndarray.square(output1 - output2))
print('x0 vs x1 dissimilarity is {}'.format(euclidean_distance[0][0]))
if __name__ == '__main__':
net = run_training()
run_predict(net)
Related
RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1
I have been working with Swin Transformers Attention MaP. Below is my code implementation from PIL import Image import numpy import sys from torchvision import transforms import numpy as np import cv2 def rollout(attentions, discard_ratio, head_fusion): result = torch.eye(attentions[0].size(-1)) with torch.no_grad(): for attention in attentions: # print(attentions) if head_fusion == "mean": attention_heads_fused = attention.mean(axis=1) elif head_fusion == "max": attention_heads_fused = attention.max(axis=1)[0] elif head_fusion == "min": attention_heads_fused = attention.min(axis=1)[0] else: raise "Attention head fusion type Not supported" # Drop the lowest attentions, but # don't drop the class token flat = attention_heads_fused.view(attention_heads_fused.size(0), -1) # print(flat) _, indices = flat.topk(int(flat.size(-1)*discard_ratio), -1, False) # print("_ : ",_," indices : ",indices) indices = indices[indices != 0] flat[0, indices] = 0 I = torch.eye(attention_heads_fused.size(-1)) # print("I : ",I) a = (attention_heads_fused + 1.0*I)/2 # print("a : ",a) # print(a.size()) print(a.sum(dim=-1)) a = a / a.sum(dim=-1) result = torch.matmul(a, result) # print("result : ",result) # Look at the total attention between the class token, # and the image patches mask = result[0, 0 , 1 :] # In case of 224x224 image, this brings us from 196 to 14 width = int(mask.size(-1)**0.5) mask = mask.reshape(width, width).numpy() mask = mask / np.max(mask) return mask class VITAttentionRollout: def __init__(self, model, attention_layer_name='dropout', head_fusion="mean", discard_ratio=0.9): self.model = model self.head_fusion = head_fusion self.discard_ratio = discard_ratio # print(self.model.named_modules()) for name, module in self.model.named_modules(): # print("Name : ",name," Module : ",module) if attention_layer_name in name: module.register_forward_hook(self.get_attention) # print(self.attentions) self.attentions = [] def get_attention(self, module, input, output): self.attentions.append(output.cpu()) def __call__(self, input_tensor): self.attentions = [] with torch.no_grad(): output = self.model(**input_tensor) # print(output) return rollout(self.attentions, self.discard_ratio, self.head_fusion) This is the main program import sys import torch from PIL import Image from torchvision import transforms import numpy as np import cv2 from google.colab.patches import cv2_imshow # from vit_rollout import VITAttentionRollout from vit_grad_rollout import VITAttentionGradRollout def show_mask_on_image(img, mask): img = np.float32(img) / 255 heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET) heatmap = np.float32(heatmap) / 255 cam = heatmap + np.float32(img) cam = cam / np.max(cam) return np.uint8(255 * cam) if __name__ == '__main__': model.eval() image_path = '/content/both.jpg' category_index = None head_fusion = 'max' discard_ratio = 0.9 transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]), ]) img = Image.open(image_path) img = img.resize((224, 224)) input_tensor = feature_extractor(img, return_tensors="pt") #print(input_tensor) if category_index is None: print("Doing Attention Rollout") attention_rollout = VITAttentionRollout(model, head_fusion=head_fusion, discard_ratio=discard_ratio) mask = attention_rollout(input_tensor) name = "attention_rollout_{:.3f}_{}.png".format(discard_ratio, head_fusion) else: print("Doing Gradient Attention Rollout") grad_rollout = VITAttentionGradRollout(model, discard_ratio=discard_ratio) mask = grad_rollout(input_tensor, category_index) name = "grad_rollout_{}_{:.3f}_{}.png".format(category_index, discard_ratio, head_fusion) np_img = np.array(img)[:, :, ::-1] mask = cv2.resize(mask, (np_img.shape[1], np_img.shape[0])) mask = show_mask_on_image(np_img, mask) cv2_imshow(np_img) cv2_imshow(mask) cv2.imwrite("input.jpg",np_img) cv2.imwrite(name, mask) cv2.waitKey(-1) I am referring the git project https://github.com/jacobgil/vit-explain But I am getting the error as RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1 I researched some git projects but there is very much less information on Swin Transformers. So is there any way that I can make an attention map for Swin transformers models ? Please help with it Thanks in advance
Unable to detect multiple faces at a time
For some reason, I'm not able to detect multiple faces at a time. It's only detecting one face at one time. How do i resolve this issue? I've added the code below. I've used google's facenet for real time face recognition. In the video output it creates a bounding box only on one face at a time. But in the console output it can count that the number of faces present are two or more than two. from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from scipy import misc import cv2 import matplotlib.pyplot as plt import numpy as np import argparse import facenet import detect_face import os from os.path import join as pjoin import sys import time import copy import math import pickle from sklearn.svm import SVC from sklearn.externals import joblib #addded #import reload #reload(sys) #sys.setdefaultencoding('utf8') print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './') #face detection minsize = 20 # minimum size of face #minsize, threshold, factor used for detection threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 items = os.listdir("/Aryabhatta Robotics Internship/facenet-master/Real_time_face/ids/aligned") #HumanNames = [] #for names in items: #HumanNames.append(names) #print(HumanNames) #HumanNames = ['Alok','Siddhant','tesra','s01','s02','s03','s04','s05','s06','s07','s08','s09','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20'] #train human name, known face names print('Loading feature extraction model') modeldir = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/20180402-114759/20180402-114759.pb' #feature extraction mmodel facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/my_classifier/my_classifier.pkl' #out own classifier classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile)#, encoding='latin1') print('load classifier file-> %s' % classifier_filename_exp) video_capture = cv2.VideoCapture(0) c = 0 # #video writer # fourcc = cv2.VideoWriter_fourcc(*'DIVX') # out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(640,480)) print('Start Recognition!') prevTime = 0 while True: #infinite loop ret, frame = video_capture.read() #video capture from webcam frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): print("faceno:" + str(i)) emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[0] = facenet.flip(cropped[0], False) scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled[0] = facenet.prewhiten(scaled[0]) scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) #print(emb_array) threshold_accuracy = 155 predictions = model.predict_proba(emb_array) #print(predictions) for i in range(len(predictions[0])): predictions[0][i] = np.exp(18*predictions[0][i]) #print(predictions) best_class_indices = np.argmax(predictions, axis=1) print(best_class_indices) print("next") best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) for i in range(len(best_class_indices)): print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 # print('result: ', best_class_indices[0]) if best_class_probabilities[i] > threshold_accuracy : #result_names = HumanNames[best_class_indices[0]] cv2.putText(frame, class_names[best_class_indices[i]], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) #for H_i in HumanNames: #if HumanNames[best_class_indices[0]] == H_i and best_class_probabilities[0] > threshold_accuracy : #flag = 1 #result_names = HumanNames[best_class_indices[0]] #cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, #1, (0, 0, 255), thickness=1, lineType=2) #else: #cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) str1 = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 cv2.putText(frame, str1, (text_fps_x, text_fps_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) # c+=1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() # #video writer # out.release() cv2.destroyAllWindows()
Threshold accuracy should be between 0 to 1. Make sure Your threshold accuracy has to be >0.60.
from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from scipy import misc from skimage.transform import resize import cv2 import numpy as np import facenet import detect_face import os import time import pickle import sys img_path='download.jpeg' modeldir = './model/20170511-185253.pb' classifier_filename = './class/classifier.pkl' npy='./npy' train_img="./train_img" with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 10 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 1 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path,0) frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time()+1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] print(1) bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append(resize(cropped[i], (image_size, image_size), anti_aliasing=True)) scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: # print(H_i) if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] else: print('Unable to align') cv2.imshow('Image', frame) if cv2.waitKey(100) & 0xFF == ord('q'): sys.exit("Thanks") cv2.destroyAllWindows()
why does tf.estimator.DNNRegressor predict negative y value?
It is so weird for the predict() function in tf.estimator.DNNRegressor because it predict negative y value, but the training dataset has no negative y value. I found this when I reduced the value of y by 1000 times, say if y was 12000 before, now I change it to 12. The range of y is [3-400] now, but after I did this, the predict() function output some negative values. I didn't set the active function in tf.estimator.DNNRegressor, so the default active function is relu which range is [0-max], but why it predicts negative value? is some bug in tf.estimator.DNNRegressor? or is there no active function applied for y? Thank you. The code is: from __future__ import absolute_import from __future__ import division from __future__ import print_function import itertools import pandas as pd import tensorflow as tf from sklearn import datasets, metrics import csv tf.logging.set_verbosity(tf.logging.INFO) COLUMNS = ["col1","col2","col3","col4","col5","col6","col7","col8","col9","col10","col11","col12","col13","col14","col15","col16","col17","col18","col19","col20","col21","col22","col23","col24","col25","col26","col27","col28","col29","col30","col31","col32","col33","col34","col35","col36","col37","col38","col39","col40","col41","col42","col43","col44","col45","col46","col47","col48","col49","col50","col51","col52","col53","col54","col55","col56","col57","col58","col59","col60","col61","col62","col63","col64","col65","col66","col67","col68","col69","col70","col71","col72","col73","col74","col75","col76","col77","col78","col79","col80","col81","col82","col83","col84","col85","col86","col87","col88","col89","col90","col91","col92","col93","col94","col95","col96","col97","col98","col99","col100","col101","col102","col103","col104","col105","col106","col107","col108","col109","col110","col111","col112","col113","col114","col115","col116","col117","col118","col119","col120","col121","col122","col123","col124","col125","col126","col127","col128","col129","col130","col131","col132","col133","col134","col135","col136","col137","col138","col139","col140","col141","col142","col143","col144","col145","col146","col147","col148","col149","col150","col151","col152","col153","col154","col155","col156","col157","col158","col159","col160","col161","col162","col163","col164","col165","col166","col167","col168","col169","col170","col171","col172","col173","col174","col175","col176","col177","col178","col179","col180","col181","col182","col183","col184","col185","col186","col187","col188","col189","col190","col191","col192","col193","col194","col195","col196","col197","col198","col199","col200","col201","col202","col203","col204","col205","col206","col207","col208","col209","col210","col211","col212","col213","col214"] FEATURES = ["col1","col2","col3","col4","col5","col6","col7","col8","col9","col10","col11","col12","col13","col14","col15","col16","col17","col18","col19","col20","col21","col22","col23","col24","col25","col26","col27","col28","col29","col30","col31","col32","col33","col34","col35","col36","col37","col38","col39","col40","col41","col42","col43","col44","col45","col46","col47","col48","col49","col50","col51","col52","col53","col54","col55","col56","col57","col58","col59","col60","col61","col62","col63","col64","col65","col66","col67","col68","col69","col70","col71","col72","col73","col74","col75","col76","col77","col78","col79","col80","col81","col82","col83","col84","col85","col86","col87","col88","col89","col90","col91","col92","col93","col94","col95","col96","col97","col98","col99","col100","col101","col102","col103","col104","col105","col106","col107","col108","col109","col110","col111","col112","col113","col114","col115","col116","col117","col118","col119","col120","col121","col122","col123","col124","col125","col126","col127","col128","col129","col130","col131","col132","col133","col134","col135","col136","col137","col138","col139","col140","col141","col142","col143","col144","col145","col146","col147","col148","col149","col150","col151","col152","col153","col154","col155","col156","col157","col158","col159","col160","col161","col162","col163","col164","col165","col166","col167","col168","col169","col170","col171","col172","col173","col174","col175","col176","col177","col178","col179","col180","col181","col182","col183","col184","col185","col186","col187","col188","col189","col190","col191","col192","col193","col194","col195","col196","col197","col198","col199","col200","col201","col202","col203","col204","col205","col206","col207","col208","col209","col211","col212","col213"] LABEL = "col214" def get_input_fn(data_set, num_epochs=None, shuffle=True): return tf.estimator.inputs.pandas_input_fn( x=pd.DataFrame({k: data_set[k].values for k in FEATURES}), y=pd.Series(data_set[LABEL].values), num_epochs=num_epochs, shuffle=shuffle) def get_mae(y_pre, y_target): absError = [] for i in range(len(y_pre)): absError.append(abs(y_pre[i] - y_target[i])) return sum(absError) / len(absError) def get_mse(y_pre, y_target): squaredError = [] for i in range(len(y_pre)): val = y_pre[i] - y_target[i] squaredError.append(val * val) return sum(squaredError) / len (squaredError) training_set = pd.read_csv("train.csv", skipinitialspace=True, skiprows=1, names=COLUMNS) test_set = pd.read_csv("test.csv", skipinitialspace=True, skiprows=1, names=COLUMNS) predict_set = pd.read_csv("predict.csv", skipinitialspace=True, skiprows=1, names=COLUMNS) feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES] regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, hidden_units=[250, 200, 100, 50], model_dir="./model") regressor.train(input_fn=get_input_fn(training_set), steps=8000) ev = regressor.evaluate(input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False)) loss_score = ev["loss"] print("Loss: {0:f}".format(loss_score)) predict = regressor.predict(input_fn=get_input_fn(predict_set, num_epochs=1, shuffle=False)) y_predict = predict_set[LABEL].values.tolist() print(type(y_predict)) print(y_predict) list_predict = list(predict) print(type(list_predict)) y_predicted = [] for i in range(len(list_predict)): y_predicted.append(list_predict[i]['predictions'][0]) print(y_predicted) fileObject = open('time_prediction.txt', 'w') for time in y_predicted: fileObject.write(str(time)) fileObject.write('\n') fileObject.close() mae = get_mae(y_predict, y_predicted) mse = get_mse(y_predict, y_predicted) print("Mean Absolute Error:" + str(mae) + " Mean Squared Error:" + str(mse)) #mae = tf.metrics.mean_absolute_error(y_predict, list_predict) #print(mea) This is the 3 data records of the dataset: 2399.998,4,100,100,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,2,44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,1,4,13,4,0,11,14,15,10,8,0,0,3,1,0,0,0,0,0,0,0,0,0,0,1,364,123428,1397595,16772133,56,103,16772153,22,22,11 1919.9984,2,30,30,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,0,0,12,2,0,9,14,10,9,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,17525535,34347257,1397595,5590711,16698805,103,5913257,896853,1190468,25 479.9996,2,60,60,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,0,0,12,2,0,9,14,10,9,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,17525535,34347257,1397595,5590711,16698805,103,5913257,896853,1190468,168 The last column is y.
How to use cv2 image data as batch_x input in tensorflow
I want to use the imgs of my screen as tensorflow input data. For recording the images I am using the script down below. The image data is saved as uint8 but tf needs float32. So, how to convert it that i can input the image in feed_dict loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,Y: batch_y}) as batch_x? import numpy as np import tensorflow as ts import win32gui, win32ui, win32con, win32api import cv2 def grab_frame(size=None): hwin = win32gui.GetDesktopWindow() if size: left, top, x, y = size width= x - left + 1 height= y - top +1 else: width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN) height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN) left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN) top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN) hwindc = win32gui.GetWindowDC(hwin) srcdc = win32ui.CreateDCFromHandle(hwindc) memdc = srcdc.CreateCompatibleDC() bmp = win32ui.CreateBitmap() bmp.CreateCompatibleBitmap(srcdc, width, height) memdc.SelectObject(bmp) memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY) signedIntsArray = bmp.GetBitmapBits(True) img = np.fromstring(signedIntsArray, dtype='uint8') img.shape = (height, width, 4) srcdc.DeleteDC() memdc.DeleteDC() win32gui.ReleaseDC(hwin, hwindc) win32gui.DeleteObject(bmp.GetHandle()) img_res = cv2.resize(img, (480,270)) return img_res
Include matplotlib in pyqt5 with hover labels
I have a plot from matplotlib for which I would like to display labels on the marker points when hover over with the mouse. I found this very helpful working example on SO and I was trying to integrate the exact same plot into a pyqt5 application. Unfortunately when having the plot in the application the hovering doesn't work anymore. Here is a full working example based on the mentioned SO post: import matplotlib.pyplot as plt import scipy.spatial as spatial import numpy as np from PyQt5.QtCore import * from PyQt5.QtGui import * from PyQt5.QtWidgets import * from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas import sys pi = np.pi cos = np.cos def fmt(x, y): return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x=x, y=y) class FollowDotCursor(object): """Display the x,y location of the nearest data point. https://stackoverflow.com/a/4674445/190597 (Joe Kington) https://stackoverflow.com/a/13306887/190597 (unutbu) https://stackoverflow.com/a/15454427/190597 (unutbu) """ def __init__(self, ax, x, y, tolerance=5, formatter=fmt, offsets=(-20, 20)): try: x = np.asarray(x, dtype='float') except (TypeError, ValueError): x = np.asarray(mdates.date2num(x), dtype='float') y = np.asarray(y, dtype='float') mask = ~(np.isnan(x) | np.isnan(y)) x = x[mask] y = y[mask] self._points = np.column_stack((x, y)) self.offsets = offsets y = y[np.abs(y-y.mean()) <= 3*y.std()] self.scale = x.ptp() self.scale = y.ptp() / self.scale if self.scale else 1 self.tree = spatial.cKDTree(self.scaled(self._points)) self.formatter = formatter self.tolerance = tolerance self.ax = ax self.fig = ax.figure self.ax.xaxis.set_label_position('top') self.dot = ax.scatter( [x.min()], [y.min()], s=130, color='green', alpha=0.7) self.annotation = self.setup_annotation() plt.connect('motion_notify_event', self) def scaled(self, points): points = np.asarray(points) return points * (self.scale, 1) def __call__(self, event): ax = self.ax # event.inaxes is always the current axis. If you use twinx, ax could be # a different axis. if event.inaxes == ax: x, y = event.xdata, event.ydata elif event.inaxes is None: return else: inv = ax.transData.inverted() x, y = inv.transform([(event.x, event.y)]).ravel() annotation = self.annotation x, y = self.snap(x, y) annotation.xy = x, y annotation.set_text(self.formatter(x, y)) self.dot.set_offsets((x, y)) bbox = ax.viewLim event.canvas.draw() def setup_annotation(self): """Draw and hide the annotation box.""" annotation = self.ax.annotate( '', xy=(0, 0), ha = 'right', xytext = self.offsets, textcoords = 'offset points', va = 'bottom', bbox = dict( boxstyle='round,pad=0.5', fc='yellow', alpha=0.75), arrowprops = dict( arrowstyle='->', connectionstyle='arc3,rad=0')) return annotation def snap(self, x, y): """Return the value in self.tree closest to x, y.""" dist, idx = self.tree.query(self.scaled((x, y)), k=1, p=1) try: return self._points[idx] except IndexError: # IndexError: index out of bounds return self._points[0] class MainWindow(QMainWindow): def __init__(self): super().__init__() self.width = 1000 self.height = 800 self.setGeometry(0, 0, self.width, self.height) canvas = self.get_canvas() w = QWidget() w.layout = QHBoxLayout() w.layout.addWidget(canvas) w.setLayout(w.layout) self.setCentralWidget(w) self.show() def get_canvas(self): fig, ax = plt.subplots() x = np.linspace(0.1, 2*pi, 10) y = cos(x) markerline, stemlines, baseline = ax.stem(x, y, '-.') plt.setp(markerline, 'markerfacecolor', 'b') plt.setp(baseline, 'color','r', 'linewidth', 2) cursor = FollowDotCursor(ax, x, y, tolerance=20) canvas = FigureCanvas(fig) return canvas app = QApplication(sys.argv) win = MainWindow() sys.exit(app.exec_()) What would I have to do to make the labels also show when hovering over in the pyqt application?
The first problem may be that you don't keep a reference to the FollowDotCursor. So to make sure the FollowDotCursor stays alive, you can make it a class variable self.cursor = FollowDotCursor(ax, x, y, tolerance=20) instead of cursor = .... Next make sure you instatiate the Cursor class after giving the figure a canvas. canvas = FigureCanvas(fig) self.cursor = FollowDotCursor(ax, x, y, tolerance=20) Finally, keep a reference to the callback inside the FollowDotCursor and don't use plt.connect but the canvas itself: self.cid = self.fig.canvas.mpl_connect('motion_notify_event', self)