Unable to detect multiple faces at a time - tensorflow
For some reason, I'm not able to detect multiple faces at a time. It's only detecting one face at one time. How do i resolve this issue? I've added the code below. I've used google's facenet for real time face recognition.
In the video output it creates a bounding box only on one face at a time. But in the console output it can count that the number of faces present are two or more than two.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
import cv2
import matplotlib.pyplot as plt
import numpy as np
import argparse
import facenet
import detect_face
import os
from os.path import join as pjoin
import sys
import time
import copy
import math
import pickle
from sklearn.svm import SVC
from sklearn.externals import joblib
#addded
#import reload
#reload(sys)
#sys.setdefaultencoding('utf8')
print('Creating networks and loading parameters')
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, './') #face detection
minsize = 20 # minimum size of face #minsize, threshold, factor used for detection
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
items = os.listdir("/Aryabhatta Robotics Internship/facenet-master/Real_time_face/ids/aligned")
#HumanNames = []
#for names in items:
#HumanNames.append(names)
#print(HumanNames)
#HumanNames = ['Alok','Siddhant','tesra','s01','s02','s03','s04','s05','s06','s07','s08','s09','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20'] #train human name, known face names
print('Loading feature extraction model')
modeldir = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/20180402-114759/20180402-114759.pb' #feature extraction mmodel
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/my_classifier/my_classifier.pkl' #out own classifier
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)#, encoding='latin1')
print('load classifier file-> %s' % classifier_filename_exp)
video_capture = cv2.VideoCapture(0)
c = 0
# #video writer
# fourcc = cv2.VideoWriter_fourcc(*'DIVX')
# out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(640,480))
print('Start Recognition!')
prevTime = 0
while True: #infinite loop
ret, frame = video_capture.read() #video capture from webcam
frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time() # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Detected_FaceNum: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
print("faceno:" + str(i))
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is inner of range!')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[0] = facenet.flip(cropped[0], False)
scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[0] = facenet.prewhiten(scaled[0])
scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
#print(emb_array)
threshold_accuracy = 155
predictions = model.predict_proba(emb_array)
#print(predictions)
for i in range(len(predictions[0])):
predictions[0][i] = np.exp(18*predictions[0][i])
#print(predictions)
best_class_indices = np.argmax(predictions, axis=1)
print(best_class_indices)
print("next")
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
print(best_class_probabilities)
for i in range(len(best_class_indices)):
print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i]))
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
#plot result idx under box
text_x = bb[i][0]
text_y = bb[i][3] + 20
# print('result: ', best_class_indices[0])
if best_class_probabilities[i] > threshold_accuracy :
#result_names = HumanNames[best_class_indices[0]]
cv2.putText(frame, class_names[best_class_indices[i]], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), thickness=1, lineType=2)
else:
cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), thickness=1, lineType=2)
#for H_i in HumanNames:
#if HumanNames[best_class_indices[0]] == H_i and best_class_probabilities[0] > threshold_accuracy :
#flag = 1
#result_names = HumanNames[best_class_indices[0]]
#cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
#1, (0, 0, 255), thickness=1, lineType=2)
#else:
#cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
# 1, (0, 0, 255), thickness=1, lineType=2)
else:
print('Unable to align')
sec = curTime - prevTime
prevTime = curTime
fps = 1 / (sec)
str1 = 'FPS: %2.3f' % fps
text_fps_x = len(frame[0]) - 150
text_fps_y = 20
cv2.putText(frame, str1, (text_fps_x, text_fps_y),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
# c+=1
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
# #video writer
# out.release()
cv2.destroyAllWindows()
Threshold accuracy should be between 0 to 1. Make sure Your threshold accuracy has to be >0.60.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
from skimage.transform import resize
import cv2
import numpy as np
import facenet
import detect_face
import os
import time
import pickle
import sys
img_path='download.jpeg'
modeldir = './model/20170511-185253.pb'
classifier_filename = './class/classifier.pkl'
npy='./npy'
train_img="./train_img"
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)
minsize = 10 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 1 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
HumanNames = os.listdir(train_img)
HumanNames.sort()
print('Loading feature extraction model')
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)
# video_capture = cv2.VideoCapture("akshay_mov.mp4")
c = 0
print('Start Recognition!')
prevTime = 0
# ret, frame = video_capture.read()
frame = cv2.imread(img_path,0)
frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time()+1 # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
print(1)
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Face Detected: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is too close')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[i] = facenet.flip(cropped[i], False)
scaled.append(resize(cropped[i], (image_size, image_size), anti_aliasing=True))
scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[i] = facenet.prewhiten(scaled[i])
scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
predictions = model.predict_proba(emb_array)
print(predictions)
best_class_indices = np.argmax(predictions, axis=1)
# print(best_class_indices)
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
print(best_class_probabilities)
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
#plot result idx under box
text_x = bb[i][0]
text_y = bb[i][3] + 20
print('Result Indices: ', best_class_indices[0])
print(HumanNames)
for H_i in HumanNames:
# print(H_i)
if HumanNames[best_class_indices[0]] == H_i:
result_names = HumanNames[best_class_indices[0]]
else:
print('Unable to align')
cv2.imshow('Image', frame)
if cv2.waitKey(100) & 0xFF == ord('q'):
sys.exit("Thanks")
cv2.destroyAllWindows()
Related
RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1
I have been working with Swin Transformers Attention MaP. Below is my code implementation from PIL import Image import numpy import sys from torchvision import transforms import numpy as np import cv2 def rollout(attentions, discard_ratio, head_fusion): result = torch.eye(attentions[0].size(-1)) with torch.no_grad(): for attention in attentions: # print(attentions) if head_fusion == "mean": attention_heads_fused = attention.mean(axis=1) elif head_fusion == "max": attention_heads_fused = attention.max(axis=1)[0] elif head_fusion == "min": attention_heads_fused = attention.min(axis=1)[0] else: raise "Attention head fusion type Not supported" # Drop the lowest attentions, but # don't drop the class token flat = attention_heads_fused.view(attention_heads_fused.size(0), -1) # print(flat) _, indices = flat.topk(int(flat.size(-1)*discard_ratio), -1, False) # print("_ : ",_," indices : ",indices) indices = indices[indices != 0] flat[0, indices] = 0 I = torch.eye(attention_heads_fused.size(-1)) # print("I : ",I) a = (attention_heads_fused + 1.0*I)/2 # print("a : ",a) # print(a.size()) print(a.sum(dim=-1)) a = a / a.sum(dim=-1) result = torch.matmul(a, result) # print("result : ",result) # Look at the total attention between the class token, # and the image patches mask = result[0, 0 , 1 :] # In case of 224x224 image, this brings us from 196 to 14 width = int(mask.size(-1)**0.5) mask = mask.reshape(width, width).numpy() mask = mask / np.max(mask) return mask class VITAttentionRollout: def __init__(self, model, attention_layer_name='dropout', head_fusion="mean", discard_ratio=0.9): self.model = model self.head_fusion = head_fusion self.discard_ratio = discard_ratio # print(self.model.named_modules()) for name, module in self.model.named_modules(): # print("Name : ",name," Module : ",module) if attention_layer_name in name: module.register_forward_hook(self.get_attention) # print(self.attentions) self.attentions = [] def get_attention(self, module, input, output): self.attentions.append(output.cpu()) def __call__(self, input_tensor): self.attentions = [] with torch.no_grad(): output = self.model(**input_tensor) # print(output) return rollout(self.attentions, self.discard_ratio, self.head_fusion) This is the main program import sys import torch from PIL import Image from torchvision import transforms import numpy as np import cv2 from google.colab.patches import cv2_imshow # from vit_rollout import VITAttentionRollout from vit_grad_rollout import VITAttentionGradRollout def show_mask_on_image(img, mask): img = np.float32(img) / 255 heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET) heatmap = np.float32(heatmap) / 255 cam = heatmap + np.float32(img) cam = cam / np.max(cam) return np.uint8(255 * cam) if __name__ == '__main__': model.eval() image_path = '/content/both.jpg' category_index = None head_fusion = 'max' discard_ratio = 0.9 transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]), ]) img = Image.open(image_path) img = img.resize((224, 224)) input_tensor = feature_extractor(img, return_tensors="pt") #print(input_tensor) if category_index is None: print("Doing Attention Rollout") attention_rollout = VITAttentionRollout(model, head_fusion=head_fusion, discard_ratio=discard_ratio) mask = attention_rollout(input_tensor) name = "attention_rollout_{:.3f}_{}.png".format(discard_ratio, head_fusion) else: print("Doing Gradient Attention Rollout") grad_rollout = VITAttentionGradRollout(model, discard_ratio=discard_ratio) mask = grad_rollout(input_tensor, category_index) name = "grad_rollout_{}_{:.3f}_{}.png".format(category_index, discard_ratio, head_fusion) np_img = np.array(img)[:, :, ::-1] mask = cv2.resize(mask, (np_img.shape[1], np_img.shape[0])) mask = show_mask_on_image(np_img, mask) cv2_imshow(np_img) cv2_imshow(mask) cv2.imwrite("input.jpg",np_img) cv2.imwrite(name, mask) cv2.waitKey(-1) I am referring the git project https://github.com/jacobgil/vit-explain But I am getting the error as RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1 I researched some git projects but there is very much less information on Swin Transformers. So is there any way that I can make an attention map for Swin transformers models ? Please help with it Thanks in advance
Using photoshop to complete undersampling in tensorflow object detection?
I'm currently training an object detection model using Tensorflow and I ran into a problem. I don't have enough samples to train my model effectively and it will take me a long time to get more samples. I was wondering if it could be a good idea to complete the remaining samples using photoshop or will I run into issues using this approach?
You have so many options: imgaug albumentations Augmentor OpenCV: Image-Augmentation-Using-OpenCV-and-Python-Github-Repo example code I use before: import numpy as np import cv2 as cv import imutils def data_augmentation(img, min_rot_angle=-180, max_rot_angle=180, crop_ratio=0.2, smooth_size=3, sharp_val=3, max_noise_scale=10): (H, W) = img.shape[:2] img_a = img all_func = ['flip', 'rotate', 'crop', 'smooth', 'sharp', 'noise'] do_func = np.random.choice(all_func, size=np.random.randint(1, len(all_func)), replace=False) #do_func = ['crop'] # Filp image, 0: vertically, 1: horizontally if 'flip' in do_func: img_a = cv.flip(img_a, np.random.choice([0, 1])) # Rotate image if 'rotate' in do_func: rot_ang = np.random.uniform(min_rot_angle, max_rot_angle) img_a = imutils.rotate_bound(img_a, rot_ang) # Crop image if 'crop' in do_func: (H_A, W_A) = img_a.shape[:2] start_x = np.random.randint(0, int(H_A * crop_ratio)) start_y = np.random.randint(0, int(W_A * crop_ratio)) end_x = np.random.randint(int(H_A * (1-crop_ratio)), H_A) end_y = np.random.randint(int(W_A * (1-crop_ratio)), W_A) img_a = img_a[start_x:end_x, start_y:end_y] # Smoothing if 'smooth' in do_func: img_a = cv.GaussianBlur(img_a, (smooth_size, smooth_size), 0) # Sharpening if 'sharp' in do_func: de_sharp_val = -(sharp_val - 1) / 8 kernel = np.array([[de_sharp_val]*3, [de_sharp_val, sharp_val, de_sharp_val], [de_sharp_val]*3]) img_a = cv.filter2D(img_a, -1, kernel) # Add the Gaussian noise to the image if 'noise' in do_func: noise_scale = np.random.uniform(0, max_noise_scale) gauss = np.random.normal(0, noise_scale, img_a.size) gauss = np.float32(gauss.reshape(img_a.shape[0],img_a.shape[1],img_a.shape[2])) img_a = cv.add(img_a,gauss) # Keep shape img_a = cv.resize(img_a, (W, H)) return np.float32(img_a) Others: You can do DA with just tensorflow! more in this blog: Data Augmentation in Python: Everything You Need to Know
why does tf.estimator.DNNRegressor predict negative y value?
It is so weird for the predict() function in tf.estimator.DNNRegressor because it predict negative y value, but the training dataset has no negative y value. I found this when I reduced the value of y by 1000 times, say if y was 12000 before, now I change it to 12. The range of y is [3-400] now, but after I did this, the predict() function output some negative values. I didn't set the active function in tf.estimator.DNNRegressor, so the default active function is relu which range is [0-max], but why it predicts negative value? is some bug in tf.estimator.DNNRegressor? or is there no active function applied for y? Thank you. The code is: from __future__ import absolute_import from __future__ import division from __future__ import print_function import itertools import pandas as pd import tensorflow as tf from sklearn import datasets, metrics import csv tf.logging.set_verbosity(tf.logging.INFO) COLUMNS = ["col1","col2","col3","col4","col5","col6","col7","col8","col9","col10","col11","col12","col13","col14","col15","col16","col17","col18","col19","col20","col21","col22","col23","col24","col25","col26","col27","col28","col29","col30","col31","col32","col33","col34","col35","col36","col37","col38","col39","col40","col41","col42","col43","col44","col45","col46","col47","col48","col49","col50","col51","col52","col53","col54","col55","col56","col57","col58","col59","col60","col61","col62","col63","col64","col65","col66","col67","col68","col69","col70","col71","col72","col73","col74","col75","col76","col77","col78","col79","col80","col81","col82","col83","col84","col85","col86","col87","col88","col89","col90","col91","col92","col93","col94","col95","col96","col97","col98","col99","col100","col101","col102","col103","col104","col105","col106","col107","col108","col109","col110","col111","col112","col113","col114","col115","col116","col117","col118","col119","col120","col121","col122","col123","col124","col125","col126","col127","col128","col129","col130","col131","col132","col133","col134","col135","col136","col137","col138","col139","col140","col141","col142","col143","col144","col145","col146","col147","col148","col149","col150","col151","col152","col153","col154","col155","col156","col157","col158","col159","col160","col161","col162","col163","col164","col165","col166","col167","col168","col169","col170","col171","col172","col173","col174","col175","col176","col177","col178","col179","col180","col181","col182","col183","col184","col185","col186","col187","col188","col189","col190","col191","col192","col193","col194","col195","col196","col197","col198","col199","col200","col201","col202","col203","col204","col205","col206","col207","col208","col209","col210","col211","col212","col213","col214"] FEATURES = ["col1","col2","col3","col4","col5","col6","col7","col8","col9","col10","col11","col12","col13","col14","col15","col16","col17","col18","col19","col20","col21","col22","col23","col24","col25","col26","col27","col28","col29","col30","col31","col32","col33","col34","col35","col36","col37","col38","col39","col40","col41","col42","col43","col44","col45","col46","col47","col48","col49","col50","col51","col52","col53","col54","col55","col56","col57","col58","col59","col60","col61","col62","col63","col64","col65","col66","col67","col68","col69","col70","col71","col72","col73","col74","col75","col76","col77","col78","col79","col80","col81","col82","col83","col84","col85","col86","col87","col88","col89","col90","col91","col92","col93","col94","col95","col96","col97","col98","col99","col100","col101","col102","col103","col104","col105","col106","col107","col108","col109","col110","col111","col112","col113","col114","col115","col116","col117","col118","col119","col120","col121","col122","col123","col124","col125","col126","col127","col128","col129","col130","col131","col132","col133","col134","col135","col136","col137","col138","col139","col140","col141","col142","col143","col144","col145","col146","col147","col148","col149","col150","col151","col152","col153","col154","col155","col156","col157","col158","col159","col160","col161","col162","col163","col164","col165","col166","col167","col168","col169","col170","col171","col172","col173","col174","col175","col176","col177","col178","col179","col180","col181","col182","col183","col184","col185","col186","col187","col188","col189","col190","col191","col192","col193","col194","col195","col196","col197","col198","col199","col200","col201","col202","col203","col204","col205","col206","col207","col208","col209","col211","col212","col213"] LABEL = "col214" def get_input_fn(data_set, num_epochs=None, shuffle=True): return tf.estimator.inputs.pandas_input_fn( x=pd.DataFrame({k: data_set[k].values for k in FEATURES}), y=pd.Series(data_set[LABEL].values), num_epochs=num_epochs, shuffle=shuffle) def get_mae(y_pre, y_target): absError = [] for i in range(len(y_pre)): absError.append(abs(y_pre[i] - y_target[i])) return sum(absError) / len(absError) def get_mse(y_pre, y_target): squaredError = [] for i in range(len(y_pre)): val = y_pre[i] - y_target[i] squaredError.append(val * val) return sum(squaredError) / len (squaredError) training_set = pd.read_csv("train.csv", skipinitialspace=True, skiprows=1, names=COLUMNS) test_set = pd.read_csv("test.csv", skipinitialspace=True, skiprows=1, names=COLUMNS) predict_set = pd.read_csv("predict.csv", skipinitialspace=True, skiprows=1, names=COLUMNS) feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES] regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, hidden_units=[250, 200, 100, 50], model_dir="./model") regressor.train(input_fn=get_input_fn(training_set), steps=8000) ev = regressor.evaluate(input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False)) loss_score = ev["loss"] print("Loss: {0:f}".format(loss_score)) predict = regressor.predict(input_fn=get_input_fn(predict_set, num_epochs=1, shuffle=False)) y_predict = predict_set[LABEL].values.tolist() print(type(y_predict)) print(y_predict) list_predict = list(predict) print(type(list_predict)) y_predicted = [] for i in range(len(list_predict)): y_predicted.append(list_predict[i]['predictions'][0]) print(y_predicted) fileObject = open('time_prediction.txt', 'w') for time in y_predicted: fileObject.write(str(time)) fileObject.write('\n') fileObject.close() mae = get_mae(y_predict, y_predicted) mse = get_mse(y_predict, y_predicted) print("Mean Absolute Error:" + str(mae) + " Mean Squared Error:" + str(mse)) #mae = tf.metrics.mean_absolute_error(y_predict, list_predict) #print(mea) This is the 3 data records of the dataset: 2399.998,4,100,100,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,2,44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,1,4,13,4,0,11,14,15,10,8,0,0,3,1,0,0,0,0,0,0,0,0,0,0,1,364,123428,1397595,16772133,56,103,16772153,22,22,11 1919.9984,2,30,30,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,0,0,12,2,0,9,14,10,9,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,17525535,34347257,1397595,5590711,16698805,103,5913257,896853,1190468,25 479.9996,2,60,60,0,0,1,10,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,3,1,0,0,0,5,0,0,0,0,2,0,0,0,0,12,2,0,9,14,10,9,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,17525535,34347257,1397595,5590711,16698805,103,5913257,896853,1190468,168 The last column is y.
Implementing contrastive loss function in mxnet
I want to train siamese net using depth image obtaining from kinect.I want to use contrastive loss function to train this network, but I'm not find contrastive loss function in mxnet.My implement is as follow: def LossFunc(distance, label, margin): distance = distance.reshape(label.shape) dis_positive = distance * label dis_negative = margin - distance zeros = nd.zeros(label.shape, ctx=ctx) dis_negative = nd.concat(dis_negative, zeros, dim=1) dis_negative = nd.max(dis_negative, axis=1).reshape(label.shape) dis_negative = (1-label) * dis_negative return 0.5 * dis_positive**2 + 0.5 * dis_negative**2 Is it right?
Here is the implementation of the Contrastive loss using Gluon API: class ContrastiveLoss(Loss): def __init__(self, margin=2.0, weight=None, batch_axis=0, **kwargs): super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs) self.margin = margin def hybrid_forward(self, F, output1, output2, label): euclidean_distance = F.sqrt(F.square(output1 - output2)) loss_contrastive = F.mean(((1-label) * F.square(euclidean_distance) + label * F.square(F.clip(self.margin - euclidean_distance, 0.0, 10)))) return loss_contrastive I have implemented it based on PyTorch example how to use Siamese net taken from here. There are quite some differences in PyTorch and MxNet, so if you want to try this one out, here is the full runnable example. You would need to download the AT&T faces data though and convert images to jpeg as mxnet doesn't support loading .pgm images out of the box. import matplotlib.pyplot as plt import numpy as np import random from PIL import Image import PIL.ImageOps import mxnet as mx from mxnet import autograd from mxnet.base import numeric_types from mxnet.gluon import nn, HybridBlock, Trainer from mxnet.gluon.data import DataLoader from mxnet.gluon.data.vision.datasets import ImageFolderDataset from mxnet.gluon.loss import Loss def imshow(img,text=None, should_save=False): npimg = img.numpy() plt.axis("off") if text: plt.text(75, 8, text, style='italic',fontweight='bold', bbox={'facecolor':'white', 'alpha':0.8, 'pad':10}) plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() def show_plot(iteration, loss): plt.plot(iteration, loss) plt.show() class Config: training_dir = "./faces/training/" testing_dir = "./faces/testing/" train_batch_size = 5 train_number_epochs = 100 class SiameseNetworkDataset(ImageFolderDataset): def __init__(self, root, transform=None): super().__init__(root, flag=0, transform=transform) self.root = root self.transform = transform def __getitem__(self, index): items_with_index = list(enumerate(self.items)) img0_index, img0_tuple = random.choice(items_with_index) # we need to make sure approx 50% of images are in the same class should_get_same_class = random.randint(0, 1) if should_get_same_class: while True: # keep looping till the same class image is found img1_index, img1_tuple = random.choice(items_with_index) if img0_tuple[1] == img1_tuple[1]: break else: img1_index, img1_tuple = random.choice(items_with_index) img0 = super().__getitem__(img0_index) img1 = super().__getitem__(img1_index) return img0[0].transpose(), img1[0].transpose(), mx.nd.array(mx.nd.array([int(img1_tuple[1] != img0_tuple[1])])) def __len__(self): return super().__len__() class ReflectionPad2D(HybridBlock): """Pads the input tensor using the reflection of the input boundary. Parameters ---------- padding: int An integer padding size Shape: - Input: :math:`(N, C, H_{in}, W_{in})` - Output: :math:`(N, C, H_{out}, W_{out})` where :math:`H_{out} = H_{in} + 2 * padding :math:`W_{out} = W_{in} + 2 * padding """ def __init__(self, padding=0, **kwargs): super(ReflectionPad2D, self).__init__(**kwargs) if isinstance(padding, numeric_types): padding = (0, 0, 0, 0, padding, padding, padding, padding) assert(len(padding) == 8) self._padding = padding def hybrid_forward(self, F, x, *args, **kwargs): return F.pad(x, mode='reflect', pad_width=self._padding) class SiameseNetwork(HybridBlock): def __init__(self): super(SiameseNetwork, self).__init__() self.cnn1 = nn.HybridSequential() with self.cnn1.name_scope(): self.cnn1.add(ReflectionPad2D(padding=1)) self.cnn1.add(nn.Conv2D(in_channels=1, channels=4, kernel_size=3)) self.cnn1.add(nn.Activation('relu')) self.cnn1.add(nn.BatchNorm()) self.cnn1.add(ReflectionPad2D(padding=1)) self.cnn1.add(nn.Conv2D(in_channels=4, channels=8, kernel_size=3)) self.cnn1.add(nn.Activation('relu')) self.cnn1.add(nn.BatchNorm()) self.cnn1.add(ReflectionPad2D(padding=1)) self.cnn1.add(nn.Conv2D(in_channels=8, channels=8, kernel_size=3)) self.cnn1.add(nn.Activation('relu')) self.cnn1.add(nn.BatchNorm()) self.fc1 = nn.HybridSequential() with self.fc1.name_scope(): self.cnn1.add(nn.Dense(500)), self.cnn1.add(nn.Activation('relu')), self.cnn1.add(nn.Dense(500)), self.cnn1.add(nn.Activation('relu')), self.cnn1.add(nn.Dense(5)) def hybrid_forward(self, F, input1, input2): output1 = self._forward_once(input1) output2 = self._forward_once(input2) return output1, output2 def _forward_once(self, x): output = self.cnn1(x) #output = output.reshape((output.shape[0],)) output = self.fc1(output) return output class ContrastiveLoss(Loss): def __init__(self, margin=2.0, weight=None, batch_axis=0, **kwargs): super(ContrastiveLoss, self).__init__(weight, batch_axis, **kwargs) self.margin = margin def hybrid_forward(self, F, output1, output2, label): euclidean_distance = F.sqrt(F.square(output1 - output2)) loss_contrastive = F.mean(((1-label) * F.square(euclidean_distance) + label * F.square(F.clip(self.margin - euclidean_distance, 0.0, 10)))) return loss_contrastive def aug_transform(data, label): augs = mx.image.CreateAugmenter(data_shape=(1, 100, 100)) for aug in augs: data = aug(data) return data, label def run_training(): siamese_dataset = SiameseNetworkDataset(root=Config.training_dir,transform=aug_transform) train_dataloader = DataLoader(siamese_dataset, shuffle=True, num_workers=1, batch_size=Config.train_batch_size) counter = [] loss_history = [] iteration_number = 0 net = SiameseNetwork() net.initialize(init=mx.init.Xavier()) trainer = Trainer(net.collect_params(), 'adam', {'learning_rate': 0.0005}) loss = ContrastiveLoss(margin=2.0) for epoch in range(0, Config.train_number_epochs): for i, data in enumerate(train_dataloader, 0): img0, img1, label = data with autograd.record(): output1, output2 = net(img0, img1) loss_contrastive = loss(output1, output2, label) loss_contrastive.backward() trainer.step(Config.train_batch_size) if i % 10 == 0: print("Epoch number {}\n Current loss {}\n".format(epoch, loss_contrastive)) iteration_number += 10 counter.append(iteration_number) loss_history.append(loss_contrastive) #show_plot(counter, loss_history) return net def run_predict(net): folder_dataset_test = SiameseNetworkDataset(root=Config.testing_dir,transform=aug_transform) test_dataloader = DataLoader(folder_dataset_test, shuffle=True, num_workers=1, batch_size=Config.train_batch_size) dataiter = iter(test_dataloader) x0, _, _ = next(dataiter) _, x1, label2 = next(dataiter) output1, output2 = net(x0, x1) euclidean_distance = mx.ndarray.sqrt(mx.ndarray.square(output1 - output2)) print('x0 vs x1 dissimilarity is {}'.format(euclidean_distance[0][0])) if __name__ == '__main__': net = run_training() run_predict(net)
PolyCollection doesn't work
I have a problem with PolyCollection matplotlib when I work with python 2.5. In random mode, it shows me following error: array dimensions must agree except for d_0 (file:collection.py - xy = np.concatenate([xy, np.zeros((1,2))])). This is my code: from mpl_toolkits.mplot3d import Axes3D from matplotlib.collections import PolyCollection from matplotlib.colors import colorConverter import matplotlib.pyplot as plt import numpy as np import matplotlib.font_manager as fm from matplotlib.patches import Rectangle import matplotlib.cm as cm colors = ['#be1e2d', '#666699', '#92d5ea', '#ee8310', '#8d10ee', '#5a3b16', '#26a4ed', '#f45a90', '#e9e744'] row_names = ['2005','2006','2007'] data = [[1,1,1,1,1,1],[2,2,2,2,2,2],[4,4,4,4,4,4],[5,5,5,5,5,5],[7,7,7,7,7,7],[8,8,8,8,8,8]] column_names = ['Ri','Pe'] #0 to start and end list i=0 for i in range(len(data)): data[i].append(0) for i in range(len(data)): data[i].insert(0,0) dpi = 50.0 width = 460 height = 440 fig = plt.figure(1, figsize=(width/dpi,height/dpi),facecolor='w') ax = fig.gca(projection='3d')#,azim=40, elev=0) #Build axes size = len(row_names) * len(data[0]) zs = np.arange(len(data)) # Setto le properties dei font fp = fm.FontProperties() fp.set_size('xx-small') #Build Graph verts = [] step = 1.0/len(data[0]) vertsColor = [] #Verify Single series or not if len(column_names) > 1: idx = 0 xs = np.arange(0, size, step) change_color = len(column_names) - 1 for z in zs: verts.append(zip(xs, data[z])) vertsColor.append(colors[idx]) if idx == change_color: idx = 0 else: idx = idx + 1 ################################################ # I THINK THE PROBLEM IS HERE poly = PolyCollection(verts,facecolors=vertsColor) ax.add_collection3d(poly, zs=zs, zdir='y') ################################################ ax.set_ylim3d(0, len(row_names)*len(column_names)) zs = np.arange(0,len(row_names) * len(column_names), len(column_names)) ax.set_yticks(zs) lim = ((size*step)-step) - (len(row_names) - 1) ax.set_xlim3d(0, lim) rect = [] serie = [] #Build legend for i in range(len(column_names)): rect.insert(i,Rectangle((0,0), 1,1, facecolor=colors[i])) serie.insert(i,column_names[i]) ax.legend((rect), (serie), loc=3, ncol=3, prop=fp) else: xs = np.arange(0, size, step) for z in zs: verts.append(zip(xs, data[z])) poly = PolyCollection(verts,facecolors=colors) #[:len(data)]) poly.set_alpha(0.6) ax.add_collection3d(poly, zs=zs, zdir='y') ax.set_xlabel('Rec') lim = ((size*step)-step) - (len(row_names) - 1) ax.set_xlim3d(0, lim) ax.set_yticks(zs) ax.set_ylim3d(0, len(row_names)) #Find Max Value max_value = 0 i=0 for i in data: mass = max(i) if mass > max_value: max_value = mass #Font Label X,Y,Z for label in ax.get_xticklabels(): label.set_fontproperties(fp) for label in ax.get_yticklabels(): label.set_fontproperties(fp) for label in ax.get_zticklabels(): label.set_fontproperties(fp) ax.set_xticklabels('') ax.set_ylabel('Years') ax.set_yticklabels(row_names, fontproperties = fp) ax.set_zlabel('Values') ax.set_zlim3d(0, max_value) ax.set_title('Test',x=0.5, y=1) plt.show() THANKS.