Unable to detect multiple faces at a time

Unable to detect multiple faces at a time - tensorflow

For some reason, I'm not able to detect multiple faces at a time. It's only detecting one face at one time. How do i resolve this issue? I've added the code below. I've used google's facenet for real time face recognition.
In the video output it creates a bounding box only on one face at a time. But in the console output it can count that the number of faces present are two or more than two.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
import cv2
import matplotlib.pyplot as plt
import numpy as np
import argparse
import facenet
import detect_face
import os
from os.path import join as pjoin
import sys
import time
import copy
import math
import pickle
from sklearn.svm import SVC
from sklearn.externals import joblib
#addded
#import reload
#reload(sys)
#sys.setdefaultencoding('utf8')
print('Creating networks and loading parameters')
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, './') #face detection
minsize = 20 # minimum size of face #minsize, threshold, factor used for detection
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
items = os.listdir("/Aryabhatta Robotics Internship/facenet-master/Real_time_face/ids/aligned")
#HumanNames = []
#for names in items:
#HumanNames.append(names)
#print(HumanNames)
#HumanNames = ['Alok','Siddhant','tesra','s01','s02','s03','s04','s05','s06','s07','s08','s09','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20'] #train human name, known face names
print('Loading feature extraction model')
modeldir = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/20180402-114759/20180402-114759.pb' #feature extraction mmodel
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/my_classifier/my_classifier.pkl' #out own classifier
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)#, encoding='latin1')
print('load classifier file-> %s' % classifier_filename_exp)
video_capture = cv2.VideoCapture(0)
c = 0
# #video writer
# fourcc = cv2.VideoWriter_fourcc(*'DIVX')
# out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(640,480))
print('Start Recognition!')
prevTime = 0
while True: #infinite loop
ret, frame = video_capture.read() #video capture from webcam
frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time() # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Detected_FaceNum: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
print("faceno:" + str(i))
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is inner of range!')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[0] = facenet.flip(cropped[0], False)
scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[0] = facenet.prewhiten(scaled[0])
scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
#print(emb_array)
threshold_accuracy = 155
predictions = model.predict_proba(emb_array)
#print(predictions)
for i in range(len(predictions[0])):
predictions[0][i] = np.exp(18*predictions[0][i])
#print(predictions)
best_class_indices = np.argmax(predictions, axis=1)
print(best_class_indices)
print("next")
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
print(best_class_probabilities)
for i in range(len(best_class_indices)):
print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i]))
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
#plot result idx under box
text_x = bb[i][0]
text_y = bb[i][3] + 20
# print('result: ', best_class_indices[0])
if best_class_probabilities[i] > threshold_accuracy :
#result_names = HumanNames[best_class_indices[0]]
cv2.putText(frame, class_names[best_class_indices[i]], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), thickness=1, lineType=2)
else:
cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), thickness=1, lineType=2)
#for H_i in HumanNames:
#if HumanNames[best_class_indices[0]] == H_i and best_class_probabilities[0] > threshold_accuracy :
#flag = 1
#result_names = HumanNames[best_class_indices[0]]
#cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
#1, (0, 0, 255), thickness=1, lineType=2)
#else:
#cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
# 1, (0, 0, 255), thickness=1, lineType=2)
else:
print('Unable to align')
sec = curTime - prevTime
prevTime = curTime
fps = 1 / (sec)
str1 = 'FPS: %2.3f' % fps
text_fps_x = len(frame[0]) - 150
text_fps_y = 20
cv2.putText(frame, str1, (text_fps_x, text_fps_y),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
# c+=1
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
# #video writer
# out.release()
cv2.destroyAllWindows()

Threshold accuracy should be between 0 to 1. Make sure Your threshold accuracy has to be >0.60.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
from skimage.transform import resize
import cv2
import numpy as np
import facenet
import detect_face
import os
import time
import pickle
import sys
img_path='download.jpeg'
modeldir = './model/20170511-185253.pb'
classifier_filename = './class/classifier.pkl'
npy='./npy'
train_img="./train_img"
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)
minsize = 10 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 1 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
HumanNames = os.listdir(train_img)
HumanNames.sort()
print('Loading feature extraction model')
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)
# video_capture = cv2.VideoCapture("akshay_mov.mp4")
c = 0
print('Start Recognition!')
prevTime = 0
# ret, frame = video_capture.read()
frame = cv2.imread(img_path,0)
frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time()+1 # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
print(1)
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Face Detected: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is too close')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[i] = facenet.flip(cropped[i], False)
scaled.append(resize(cropped[i], (image_size, image_size), anti_aliasing=True))
scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[i] = facenet.prewhiten(scaled[i])
scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
predictions = model.predict_proba(emb_array)
print(predictions)
best_class_indices = np.argmax(predictions, axis=1)
# print(best_class_indices)
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
print(best_class_probabilities)
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
#plot result idx under box
text_x = bb[i][0]
text_y = bb[i][3] + 20
print('Result Indices: ', best_class_indices[0])
print(HumanNames)
for H_i in HumanNames:
# print(H_i)
if HumanNames[best_class_indices[0]] == H_i:
result_names = HumanNames[best_class_indices[0]]
else:
print('Unable to align')
cv2.imshow('Image', frame)
if cv2.waitKey(100) & 0xFF == ord('q'):
sys.exit("Thanks")
cv2.destroyAllWindows()

Related

RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1

I have been working with Swin Transformers Attention MaP. Below is my code implementation
from PIL import Image
import numpy
import sys
from torchvision import transforms
import numpy as np
import cv2
def rollout(attentions, discard_ratio, head_fusion):
result = torch.eye(attentions[0].size(-1))
with torch.no_grad():
for attention in attentions:
# print(attentions)
if head_fusion == "mean":
attention_heads_fused = attention.mean(axis=1)
elif head_fusion == "max":
attention_heads_fused = attention.max(axis=1)[0]
elif head_fusion == "min":
attention_heads_fused = attention.min(axis=1)[0]
else:
raise "Attention head fusion type Not supported"
# Drop the lowest attentions, but
# don't drop the class token
flat = attention_heads_fused.view(attention_heads_fused.size(0), -1)
# print(flat)
_, indices = flat.topk(int(flat.size(-1)*discard_ratio), -1, False)
# print("_ : ",_," indices : ",indices)
indices = indices[indices != 0]
flat[0, indices] = 0
I = torch.eye(attention_heads_fused.size(-1))
# print("I : ",I)
a = (attention_heads_fused + 1.0*I)/2
# print("a : ",a)
# print(a.size())
print(a.sum(dim=-1))
a = a / a.sum(dim=-1)
result = torch.matmul(a, result)
# print("result : ",result)
# Look at the total attention between the class token,
# and the image patches
mask = result[0, 0 , 1 :]
# In case of 224x224 image, this brings us from 196 to 14
width = int(mask.size(-1)**0.5)
mask = mask.reshape(width, width).numpy()
mask = mask / np.max(mask)
return mask
class VITAttentionRollout:
def __init__(self, model, attention_layer_name='dropout', head_fusion="mean",
discard_ratio=0.9):
self.model = model
self.head_fusion = head_fusion
self.discard_ratio = discard_ratio
# print(self.model.named_modules())
for name, module in self.model.named_modules():
# print("Name : ",name," Module : ",module)
if attention_layer_name in name:
module.register_forward_hook(self.get_attention)
# print(self.attentions)
self.attentions = []
def get_attention(self, module, input, output):
self.attentions.append(output.cpu())
def __call__(self, input_tensor):
self.attentions = []
with torch.no_grad():
output = self.model(**input_tensor)
# print(output)
return rollout(self.attentions, self.discard_ratio, self.head_fusion)
This is the main program
import sys
import torch
from PIL import Image
from torchvision import transforms
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
# from vit_rollout import VITAttentionRollout
from vit_grad_rollout import VITAttentionGradRollout
def show_mask_on_image(img, mask):
img = np.float32(img) / 255
heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
cam = heatmap + np.float32(img)
cam = cam / np.max(cam)
return np.uint8(255 * cam)
if __name__ == '__main__':
model.eval()
image_path = '/content/both.jpg'
category_index = None
head_fusion = 'max'
discard_ratio = 0.9
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]),
])
img = Image.open(image_path)
img = img.resize((224, 224))
input_tensor = feature_extractor(img, return_tensors="pt")
#print(input_tensor)
if category_index is None:
print("Doing Attention Rollout")
attention_rollout = VITAttentionRollout(model, head_fusion=head_fusion,
discard_ratio=discard_ratio)
mask = attention_rollout(input_tensor)
name = "attention_rollout_{:.3f}_{}.png".format(discard_ratio, head_fusion)
else:
print("Doing Gradient Attention Rollout")
grad_rollout = VITAttentionGradRollout(model, discard_ratio=discard_ratio)
mask = grad_rollout(input_tensor, category_index)
name = "grad_rollout_{}_{:.3f}_{}.png".format(category_index,
discard_ratio, head_fusion)
np_img = np.array(img)[:, :, ::-1]
mask = cv2.resize(mask, (np_img.shape[1], np_img.shape[0]))
mask = show_mask_on_image(np_img, mask)
cv2_imshow(np_img)
cv2_imshow(mask)
cv2.imwrite("input.jpg",np_img)
cv2.imwrite(name, mask)
cv2.waitKey(-1)
I am referring the git project https://github.com/jacobgil/vit-explain
But I am getting the error as RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1
I researched some git projects but there is very much less information on Swin Transformers. So is there any way that I can make an attention map for Swin transformers models ?
Please help with it
Thanks in advance

Using photoshop to complete undersampling in tensorflow object detection?

I'm currently training an object detection model using Tensorflow and I ran into a problem. I don't have enough samples to train my model effectively and it will take me a long time to get more samples. I was wondering if it could be a good idea to complete the remaining samples using photoshop or will I run into issues using this approach?

You have so many options:
imgaug
albumentations
Augmentor
OpenCV:
Image-Augmentation-Using-OpenCV-and-Python-Github-Repo
example code I use before:
import numpy as np
import cv2 as cv
import imutils
def data_augmentation(img, min_rot_angle=-180, max_rot_angle=180, crop_ratio=0.2, smooth_size=3, sharp_val=3, max_noise_scale=10):
(H, W) = img.shape[:2]
img_a = img
all_func = ['flip', 'rotate', 'crop', 'smooth', 'sharp', 'noise']
do_func = np.random.choice(all_func, size=np.random.randint(1, len(all_func)), replace=False)
#do_func = ['crop']
# Filp image, 0: vertically, 1: horizontally
if 'flip' in do_func:
img_a = cv.flip(img_a, np.random.choice([0, 1]))
# Rotate image
if 'rotate' in do_func:
rot_ang = np.random.uniform(min_rot_angle, max_rot_angle)
img_a = imutils.rotate_bound(img_a, rot_ang)
# Crop image
if 'crop' in do_func:
(H_A, W_A) = img_a.shape[:2]
start_x = np.random.randint(0, int(H_A * crop_ratio))
start_y = np.random.randint(0, int(W_A * crop_ratio))
end_x = np.random.randint(int(H_A * (1-crop_ratio)), H_A)
end_y = np.random.randint(int(W_A * (1-crop_ratio)), W_A)
img_a = img_a[start_x:end_x, start_y:end_y]
# Smoothing
if 'smooth' in do_func:
img_a = cv.GaussianBlur(img_a, (smooth_size, smooth_size), 0)
# Sharpening
if 'sharp' in do_func:
de_sharp_val = -(sharp_val - 1) / 8
kernel = np.array([[de_sharp_val]*3, [de_sharp_val, sharp_val, de_sharp_val], [de_sharp_val]*3])
img_a = cv.filter2D(img_a, -1, kernel)
# Add the Gaussian noise to the image
if 'noise' in do_func:
noise_scale = np.random.uniform(0, max_noise_scale)
gauss = np.random.normal(0, noise_scale, img_a.size)
gauss = np.float32(gauss.reshape(img_a.shape[0],img_a.shape[1],img_a.shape[2]))
img_a = cv.add(img_a,gauss)
# Keep shape
img_a = cv.resize(img_a, (W, H))
return np.float32(img_a)
Others:
You can do DA with just tensorflow! more in this blog: Data Augmentation in Python: Everything You Need to Know

why does tf.estimator.DNNRegressor predict negative y value?

It is so weird for the predict() function in tf.estimator.DNNRegressor because it predict negative y value, but the training dataset has no negative y value. I found this when I reduced the value of y by 1000 times, say if y was 12000 before, now I change it to 12. The range of y is [3-400] now, but after I did this, the predict() function output some negative values. I didn't set the active function in tf.estimator.DNNRegressor, so the default active function is relu which range is [0-max], but why it predicts negative value? is some bug in tf.estimator.DNNRegressor? or is there no active function applied for y? Thank you.
The code is:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import itertools
import pandas as pd
import tensorflow as tf
from sklearn import datasets, metrics
import csv
tf.logging.set_verbosity(tf.logging.INFO)
COLUMNS = ["col1","col2","col3","col4","col5","col6","col7","col8","col9","col10","col11","col12","col13","col14","col15","col16","col17","col18","col19","col20","col21","col22","col23","col24","col25","col26","col27","col28","col29","col30","col31","col32","col33","col34","col35","col36","col37","col38","col39","col40","col41","col42","col43","col44","col45","col46","col47","col48","col49","col50","col51","col52","col53","col54","col55","col56","col57","col58","col59","col60","col61","col62","col63","col64","col65","col66","col67","col68","col69","col70","col71","col72","col73","col74","col75","col76","col77","col78","col79","col80","col81","col82","col83","col84","col85","col86","col87","col88","col89","col90","col91","col92","col93","col94","col95","col96","col97","col98","col99","col100","col101","col102","col103","col104","col105","col106","col107","col108","col109","col110","col111","col112","col113","col114","col115","col116","col117","col118","col119","col120","col121","col122","col123","col124","col125","col126","col127","col128","col129","col130","col131","col132","col133","col134","col135","col136","col137","col138","col139","col140","col141","col142","col143","col144","col145","col146","col147","col148","col149","col150","col151","col152","col153","col154","col155","col156","col157","col158","col159","col160","col161","col162","col163","col164","col165","col166","col167","col168","col169","col170","col171","col172","col173","col174","col175","col176","col177","col178","col179","col180","col181","col182","col183","col184","col185","col186","col187","col188","col189","col190","col191","col192","col193","col194","col195","col196","col197","col198","col199","col200","col201","col202","col203","col204","col205","col206","col207","col208","col209","col210","col211","col212","col213","col214"]
FEATURES = ["col1","col2","col3","col4","col5","col6","col7","col8","col9","col10","col11","col12","col13","col14","col15","col16","col17","col18","col19","col20","col21","col22","col23","col24","col25","col26","col27","col28","col29","col30","col31","col32","col33","col34","col35","col36","col37","col38","col39","col40","col41","col42","col43","col44","col45","col46","col47","col48","col49","col50","col51","col52","col53","col54","col55","col56","col57","col58","col59","col60","col61","col62","col63","col64","col65","col66","col67","col68","col69","col70","col71","col72","col73","col74","col75","col76","col77","col78","col79","col80","col81","col82","col83","col84","col85","col86","col87","col88","col89","col90","col91","col92","col93","col94","col95","col96","col97","col98","col99","col100","col101","col102","col103","col104","col105","col106","col107","col108","col109","col110","col111","col112","col113","col114","col115","col116","col117","col118","col119","col120","col121","col122","col123","col124","col125","col126","col127","col128","col129","col130","col131","col132","col133","col134","col135","col136","col137","col138","col139","col140","col141","col142","col143","col144","col145","col146","col147","col148","col149","col150","col151","col152","col153","col154","col155","col156","col157","col158","col159","col160","col161","col162","col163","col164","col165","col166","col167","col168","col169","col170","col171","col172","col173","col174","col175","col176","col177","col178","col179","col180","col181","col182","col183","col184","col185","col186","col187","col188","col189","col190","col191","col192","col193","col194","col195","col196","col197","col198","col199","col200","col201","col202","col203","col204","col205","col206","col207","col208","col209","col211","col212","col213"]
LABEL = "col214"
def get_input_fn(data_set, num_epochs=None, shuffle=True):
return tf.estimator.inputs.pandas_input_fn(
x=pd.DataFrame({k: data_set[k].values for k in FEATURES}),
y=pd.Series(data_set[LABEL].values),
num_epochs=num_epochs,
shuffle=shuffle)
def get_mae(y_pre, y_target):
absError = []
for i in range(len(y_pre)):
absError.append(abs(y_pre[i] - y_target[i]))
return sum(absError) / len(absError)
def get_mse(y_pre, y_target):
squaredError = []
for i in range(len(y_pre)):
val = y_pre[i] - y_target[i]
squaredError.append(val * val)
return sum(squaredError) / len (squaredError)
training_set = pd.read_csv("train.csv", skipinitialspace=True, skiprows=1, names=COLUMNS)
test_set = pd.read_csv("test.csv", skipinitialspace=True, skiprows=1, names=COLUMNS)
predict_set = pd.read_csv("predict.csv", skipinitialspace=True, skiprows=1, names=COLUMNS)
feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]
regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, hidden_units=[250, 200, 100, 50], model_dir="./model")
regressor.train(input_fn=get_input_fn(training_set), steps=8000)
ev = regressor.evaluate(input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False))
loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))
predict = regressor.predict(input_fn=get_input_fn(predict_set, num_epochs=1, shuffle=False))
y_predict = predict_set[LABEL].values.tolist()
print(type(y_predict))
print(y_predict)
list_predict = list(predict)
print(type(list_predict))
y_predicted = []
for i in range(len(list_predict)):
y_predicted.append(list_predict[i]['predictions'][0])
print(y_predicted)
fileObject = open('time_prediction.txt', 'w')
for time in y_predicted:
fileObject.write(str(time))
fileObject.write('\n')
fileObject.close()
mae = get_mae(y_predict, y_predicted)
mse = get_mse(y_predict, y_predicted)
print("Mean Absolute Error:" + str(mae) + " Mean Squared Error:" + str(mse))
#mae = tf.metrics.mean_absolute_error(y_predict, list_predict)
#print(mea)
This is the 3 data records of the dataset:
2399.998,4,100,100,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,2,44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,1,4,13,4,0,11,14,15,10,8,0,0,3,1,0,0,0,0,0,0,0,0,0,0,1,364,123428,1397595,16772133,56,103,16772153,22,22,11
1919.9984,2,30,30,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,0,0,12,2,0,9,14,10,9,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,17525535,34347257,1397595,5590711,16698805,103,5913257,896853,1190468,25
479.9996,2,60,60,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,0,0,12,2,0,9,14,10,9,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,17525535,34347257,1397595,5590711,16698805,103,5913257,896853,1190468,168
The last column is y.

Implementing contrastive loss function in mxnet

I want to train siamese net using depth image obtaining from kinect.I want to use contrastive loss function to train this network, but I'm not find contrastive loss function in mxnet.My implement is as follow:
def LossFunc(distance, label, margin):
distance = distance.reshape(label.shape)
dis_positive = distance * label
dis_negative = margin - distance
zeros = nd.zeros(label.shape, ctx=ctx)
dis_negative = nd.concat(dis_negative, zeros, dim=1)
dis_negative = nd.max(dis_negative, axis=1).reshape(label.shape)
dis_negative = (1-label) * dis_negative
return 0.5 * dis_positive**2 + 0.5 * dis_negative**2
Is it right?

Here is the implementation of the Contrastive loss using Gluon API:
class ContrastiveLoss(Loss):
def __init__(self, margin=2.0, weight=None, batch_axis=0, **kwargs):
super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs)
self.margin = margin
def hybrid_forward(self, F, output1, output2, label):
euclidean_distance = F.sqrt(F.square(output1 - output2))
loss_contrastive = F.mean(((1-label) * F.square(euclidean_distance) +
label * F.square(F.clip(self.margin - euclidean_distance, 0.0, 10))))
return loss_contrastive
I have implemented it based on PyTorch example how to use Siamese net taken from here.
There are quite some differences in PyTorch and MxNet, so if you want to try this one out, here is the full runnable example. You would need to download the AT&T faces data though and convert images to jpeg as mxnet doesn't support loading .pgm images out of the box.
import matplotlib.pyplot as plt
import numpy as np
import random
from PIL import Image
import PIL.ImageOps
import mxnet as mx
from mxnet import autograd
from mxnet.base import numeric_types
from mxnet.gluon import nn, HybridBlock, Trainer
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision.datasets import ImageFolderDataset
from mxnet.gluon.loss import Loss
def imshow(img,text=None, should_save=False):
npimg = img.numpy()
plt.axis("off")
if text:
plt.text(75, 8, text, style='italic',fontweight='bold',
bbox={'facecolor':'white', 'alpha':0.8, 'pad':10})
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
def show_plot(iteration, loss):
plt.plot(iteration, loss)
plt.show()
class Config:
training_dir = "./faces/training/"
testing_dir = "./faces/testing/"
train_batch_size = 5
train_number_epochs = 100
class SiameseNetworkDataset(ImageFolderDataset):
def __init__(self, root, transform=None):
super().__init__(root, flag=0, transform=transform)
self.root = root
self.transform = transform
def __getitem__(self, index):
items_with_index = list(enumerate(self.items))
img0_index, img0_tuple = random.choice(items_with_index)
# we need to make sure approx 50% of images are in the same class
should_get_same_class = random.randint(0, 1)
if should_get_same_class:
while True:
# keep looping till the same class image is found
img1_index, img1_tuple = random.choice(items_with_index)
if img0_tuple[1] == img1_tuple[1]:
break
else:
img1_index, img1_tuple = random.choice(items_with_index)
img0 = super().__getitem__(img0_index)
img1 = super().__getitem__(img1_index)
return img0[0].transpose(), img1[0].transpose(), mx.nd.array(mx.nd.array([int(img1_tuple[1] != img0_tuple[1])]))
def __len__(self):
return super().__len__()
class ReflectionPad2D(HybridBlock):
"""Pads the input tensor using the reflection of the input boundary.
Parameters
----------
padding: int
An integer padding size
Shape:
- Input: :math:`(N, C, H_{in}, W_{in})`
- Output: :math:`(N, C, H_{out}, W_{out})` where
:math:`H_{out} = H_{in} + 2 * padding
:math:`W_{out} = W_{in} + 2 * padding
"""
def __init__(self, padding=0, **kwargs):
super(ReflectionPad2D, self).__init__(**kwargs)
if isinstance(padding, numeric_types):
padding = (0, 0, 0, 0, padding, padding, padding, padding)
assert(len(padding) == 8)
self._padding = padding
def hybrid_forward(self, F, x, *args, **kwargs):
return F.pad(x, mode='reflect', pad_width=self._padding)
class SiameseNetwork(HybridBlock):
def __init__(self):
super(SiameseNetwork, self).__init__()
self.cnn1 = nn.HybridSequential()
with self.cnn1.name_scope():
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=1, channels=4, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=4, channels=8, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.cnn1.add(ReflectionPad2D(padding=1))
self.cnn1.add(nn.Conv2D(in_channels=8, channels=8, kernel_size=3))
self.cnn1.add(nn.Activation('relu'))
self.cnn1.add(nn.BatchNorm())
self.fc1 = nn.HybridSequential()
with self.fc1.name_scope():
self.cnn1.add(nn.Dense(500)),
self.cnn1.add(nn.Activation('relu')),
self.cnn1.add(nn.Dense(500)),
self.cnn1.add(nn.Activation('relu')),
self.cnn1.add(nn.Dense(5))
def hybrid_forward(self, F, input1, input2):
output1 = self._forward_once(input1)
output2 = self._forward_once(input2)
return output1, output2
def _forward_once(self, x):
output = self.cnn1(x)
#output = output.reshape((output.shape[0],))
output = self.fc1(output)
return output
class ContrastiveLoss(Loss):
def __init__(self, margin=2.0, weight=None, batch_axis=0, **kwargs):
super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs)
self.margin = margin
def hybrid_forward(self, F, output1, output2, label):
euclidean_distance = F.sqrt(F.square(output1 - output2))
loss_contrastive = F.mean(((1-label) * F.square(euclidean_distance) +
label * F.square(F.clip(self.margin - euclidean_distance, 0.0, 10))))
return loss_contrastive
def aug_transform(data, label):
augs = mx.image.CreateAugmenter(data_shape=(1, 100, 100))
for aug in augs:
data = aug(data)
return data, label
def run_training():
siamese_dataset = SiameseNetworkDataset(root=Config.training_dir,transform=aug_transform)
train_dataloader = DataLoader(siamese_dataset, shuffle=True, num_workers=1, batch_size=Config.train_batch_size)
counter = []
loss_history = []
iteration_number = 0
net = SiameseNetwork()
net.initialize(init=mx.init.Xavier())
trainer = Trainer(net.collect_params(), 'adam', {'learning_rate': 0.0005})
loss = ContrastiveLoss(margin=2.0)
for epoch in range(0, Config.train_number_epochs):
for i, data in enumerate(train_dataloader, 0):
img0, img1, label = data
with autograd.record():
output1, output2 = net(img0, img1)
loss_contrastive = loss(output1, output2, label)
loss_contrastive.backward()
trainer.step(Config.train_batch_size)
if i % 10 == 0:
print("Epoch number {}\n Current loss {}\n".format(epoch, loss_contrastive))
iteration_number += 10
counter.append(iteration_number)
loss_history.append(loss_contrastive)
#show_plot(counter, loss_history)
return net
def run_predict(net):
folder_dataset_test = SiameseNetworkDataset(root=Config.testing_dir,transform=aug_transform)
test_dataloader = DataLoader(folder_dataset_test, shuffle=True, num_workers=1, batch_size=Config.train_batch_size)
dataiter = iter(test_dataloader)
x0, _, _ = next(dataiter)
_, x1, label2 = next(dataiter)
output1, output2 = net(x0, x1)
euclidean_distance = mx.ndarray.sqrt(mx.ndarray.square(output1 - output2))
print('x0 vs x1 dissimilarity is {}'.format(euclidean_distance[0][0]))
if __name__ == '__main__':
net = run_training()
run_predict(net)

PolyCollection doesn't work

I have a problem with PolyCollection matplotlib when I work with python 2.5. In random mode, it shows me following error: array dimensions must agree except for d_0 (file:collection.py - xy = np.concatenate([xy, np.zeros((1,2))])). This is my code:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection
from matplotlib.colors import colorConverter
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.font_manager as fm
from matplotlib.patches import Rectangle
import matplotlib.cm as cm
colors = ['#be1e2d',
'#666699',
'#92d5ea',
'#ee8310',
'#8d10ee',
'#5a3b16',
'#26a4ed',
'#f45a90',
'#e9e744']
row_names = ['2005','2006','2007']
data = [[1,1,1,1,1,1],[2,2,2,2,2,2],[4,4,4,4,4,4],[5,5,5,5,5,5],[7,7,7,7,7,7],[8,8,8,8,8,8]]
column_names = ['Ri','Pe']
#0 to start and end list
i=0
for i in range(len(data)):
data[i].append(0)
for i in range(len(data)):
data[i].insert(0,0)
dpi = 50.0
width = 460
height = 440
fig = plt.figure(1, figsize=(width/dpi,height/dpi),facecolor='w')
ax = fig.gca(projection='3d')#,azim=40, elev=0)
#Build axes
size = len(row_names) * len(data[0])
zs = np.arange(len(data))
# Setto le properties dei font
fp = fm.FontProperties()
fp.set_size('xx-small')
#Build Graph
verts = []
step = 1.0/len(data[0])
vertsColor = []
#Verify Single series or not
if len(column_names) > 1:
idx = 0
xs = np.arange(0, size, step)
change_color = len(column_names) - 1
for z in zs:
verts.append(zip(xs, data[z]))
vertsColor.append(colors[idx])
if idx == change_color:
idx = 0
else:
idx = idx + 1
################################################
# I THINK THE PROBLEM IS HERE
poly = PolyCollection(verts,facecolors=vertsColor)
ax.add_collection3d(poly, zs=zs, zdir='y')
################################################
ax.set_ylim3d(0, len(row_names)*len(column_names))
zs = np.arange(0,len(row_names) * len(column_names), len(column_names))
ax.set_yticks(zs)
lim = ((size*step)-step) - (len(row_names) - 1)
ax.set_xlim3d(0, lim)
rect = []
serie = []
#Build legend
for i in range(len(column_names)):
rect.insert(i,Rectangle((0,0), 1,1, facecolor=colors[i]))
serie.insert(i,column_names[i])
ax.legend((rect), (serie), loc=3, ncol=3, prop=fp)
else:
xs = np.arange(0, size, step)
for z in zs:
verts.append(zip(xs, data[z]))
poly = PolyCollection(verts,facecolors=colors) #[:len(data)])
poly.set_alpha(0.6)
ax.add_collection3d(poly, zs=zs, zdir='y')
ax.set_xlabel('Rec')
lim = ((size*step)-step) - (len(row_names) - 1)
ax.set_xlim3d(0, lim)
ax.set_yticks(zs)
ax.set_ylim3d(0, len(row_names))
#Find Max Value
max_value = 0
i=0
for i in data:
mass = max(i)
if mass > max_value:
max_value = mass
#Font Label X,Y,Z
for label in ax.get_xticklabels():
label.set_fontproperties(fp)
for label in ax.get_yticklabels():
label.set_fontproperties(fp)
for label in ax.get_zticklabels():
label.set_fontproperties(fp)
ax.set_xticklabels('')
ax.set_ylabel('Years')
ax.set_yticklabels(row_names, fontproperties = fp)
ax.set_zlabel('Values')
ax.set_zlim3d(0, max_value)
ax.set_title('Test',x=0.5, y=1)
plt.show()
THANKS.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Unable to detect multiple faces at a time - tensorflow

Threshold accuracy should be between 0 to 1. Make sure Your threshold accuracy has to be >0.60.

Related

RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1

Using photoshop to complete undersampling in tensorflow object detection?

why does tf.estimator.DNNRegressor predict negative y value?

Implementing contrastive loss function in mxnet

PolyCollection doesn't work

Categories

Resources