I have been working with Swin Transformers Attention MaP. Below is my code implementation
from PIL import Image
import numpy
import sys
from torchvision import transforms
import numpy as np
import cv2
def rollout(attentions, discard_ratio, head_fusion):
result = torch.eye(attentions[0].size(-1))
with torch.no_grad():
for attention in attentions:
# print(attentions)
if head_fusion == "mean":
attention_heads_fused = attention.mean(axis=1)
elif head_fusion == "max":
attention_heads_fused = attention.max(axis=1)[0]
elif head_fusion == "min":
attention_heads_fused = attention.min(axis=1)[0]
else:
raise "Attention head fusion type Not supported"
# Drop the lowest attentions, but
# don't drop the class token
flat = attention_heads_fused.view(attention_heads_fused.size(0), -1)
# print(flat)
_, indices = flat.topk(int(flat.size(-1)*discard_ratio), -1, False)
# print("_ : ",_," indices : ",indices)
indices = indices[indices != 0]
flat[0, indices] = 0
I = torch.eye(attention_heads_fused.size(-1))
# print("I : ",I)
a = (attention_heads_fused + 1.0*I)/2
# print("a : ",a)
# print(a.size())
print(a.sum(dim=-1))
a = a / a.sum(dim=-1)
result = torch.matmul(a, result)
# print("result : ",result)
# Look at the total attention between the class token,
# and the image patches
mask = result[0, 0 , 1 :]
# In case of 224x224 image, this brings us from 196 to 14
width = int(mask.size(-1)**0.5)
mask = mask.reshape(width, width).numpy()
mask = mask / np.max(mask)
return mask
class VITAttentionRollout:
def __init__(self, model, attention_layer_name='dropout', head_fusion="mean",
discard_ratio=0.9):
self.model = model
self.head_fusion = head_fusion
self.discard_ratio = discard_ratio
# print(self.model.named_modules())
for name, module in self.model.named_modules():
# print("Name : ",name," Module : ",module)
if attention_layer_name in name:
module.register_forward_hook(self.get_attention)
# print(self.attentions)
self.attentions = []
def get_attention(self, module, input, output):
self.attentions.append(output.cpu())
def __call__(self, input_tensor):
self.attentions = []
with torch.no_grad():
output = self.model(**input_tensor)
# print(output)
return rollout(self.attentions, self.discard_ratio, self.head_fusion)
This is the main program
import sys
import torch
from PIL import Image
from torchvision import transforms
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
# from vit_rollout import VITAttentionRollout
from vit_grad_rollout import VITAttentionGradRollout
def show_mask_on_image(img, mask):
img = np.float32(img) / 255
heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
cam = heatmap + np.float32(img)
cam = cam / np.max(cam)
return np.uint8(255 * cam)
if __name__ == '__main__':
model.eval()
image_path = '/content/both.jpg'
category_index = None
head_fusion = 'max'
discard_ratio = 0.9
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]),
])
img = Image.open(image_path)
img = img.resize((224, 224))
input_tensor = feature_extractor(img, return_tensors="pt")
#print(input_tensor)
if category_index is None:
print("Doing Attention Rollout")
attention_rollout = VITAttentionRollout(model, head_fusion=head_fusion,
discard_ratio=discard_ratio)
mask = attention_rollout(input_tensor)
name = "attention_rollout_{:.3f}_{}.png".format(discard_ratio, head_fusion)
else:
print("Doing Gradient Attention Rollout")
grad_rollout = VITAttentionGradRollout(model, discard_ratio=discard_ratio)
mask = grad_rollout(input_tensor, category_index)
name = "grad_rollout_{}_{:.3f}_{}.png".format(category_index,
discard_ratio, head_fusion)
np_img = np.array(img)[:, :, ::-1]
mask = cv2.resize(mask, (np_img.shape[1], np_img.shape[0]))
mask = show_mask_on_image(np_img, mask)
cv2_imshow(np_img)
cv2_imshow(mask)
cv2.imwrite("input.jpg",np_img)
cv2.imwrite(name, mask)
cv2.waitKey(-1)
I am referring the git project https://github.com/jacobgil/vit-explain
But I am getting the error as RuntimeError: The size of tensor a (49) must match the size of tensor b (64) at non-singleton dimension 1
I researched some git projects but there is very much less information on Swin Transformers. So is there any way that I can make an attention map for Swin transformers models ?
Please help with it
Thanks in advance
I am testing the performance of TPUEmbedding on GCP TPU v3 using single TPU core.
I found that I can only get around 1-2 GB/s memory bandwidth. This is very low
compared with the specification (900GB/s). Wondering what's wrong with the code.
This is using tensroflow '2.3.0-dev20200620'
To run the code, you need to set up environment var TPU_TP
import time
import tensorflow as tf
import itertools
import numpy as np
import os
import sys
from tensorflow.python.ops import init_ops_v2
from tensorflow.python.tpu import tpu_embedding_v2
from tensorflow.python.tpu import tpu_embedding_v2_utils
from tensorflow.python.distribute import tpu_strategy
from tensorflow.python.tpu import tpu_strategy_util
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
from tensorflow.python.eager import def_function
from tensorflow.python.eager import remote
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.ops import array_ops
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.distribute import distribute_lib
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.util import nest
batch = 16384
nnz = 30
em = 128
features = 1000000
feature_watched_values = np.random.randint(0, features, (batch * nnz * 1, ))
batch_size = batch * nnz
resolver = None
table_test = tpu_embedding_v2_utils.TableConfig(
vocabulary_size=features,
dim=em,
initializer=None,
combiner='sum',
name='test')
feature_config = (
tpu_embedding_v2_utils.FeatureConfig(
table=table_test, name='watched'))
def get_strategy():
resolver = tpu_cluster_resolver.TPUClusterResolver(tpu="grpc://"+os.environ["TPU_IP"])
remote.connect_to_cluster(resolver)
topology = tpu_strategy_util.initialize_tpu_system(resolver)
device_assignment = tf.python.tpu.device_assignment.DeviceAssignment.build(topology,computation_shape=[1, 1, 1, 1],num_replicas=1)
return tpu_strategy.TPUStrategy(resolver, device_assignment=device_assignment)
def create_strategy_and_mid_level():
strategy = get_strategy()
with strategy.scope():
optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1)
embedding = tpu_embedding_v2.TPUEmbedding(
feature_config=feature_config,
batch_size=batch_size,
optimizer=optimizer)
return strategy, embedding, optimizer
strategy, embedding, optimizer = create_strategy_and_mid_level()
training = False
def create_dense_input_fn(strategy, include_weights=False, weight=0.5):
def input_fn(ctx):
del ctx
features = (feature_watched_values)
return dataset_ops.DatasetV2.from_tensor_slices(features).repeat().batch(batch_size)
return input_fn
def get_replica_numpy(structured, strategy, replica_id):
def select_replica(x):
x = strategy.experimental_local_results(x)
if len(x) == 1:
return x
return x[replica_id]
return nest.map_structure(select_replica, structured)
input_fn = create_dense_input_fn(strategy)
dist = strategy.experimental_distribute_datasets_from_function(
input_fn,
options=distribute_lib.InputOptions(
experimental_prefetch_to_device=False))
dist_iter = iter(dist)
# #def_function.function
#tf.function
def test_fn():
def step():
print("In STEPs")
activation = embedding.dequeue()
shard0 = get_replica_numpy(activation, strategy, 0)
res = tf.math.reduce_sum(tf.reshape(shard0[0], [batch, nnz, em]), axis=1)
print("RES device : ", res.device)
return res
embedding.enqueue(next(dist_iter), training=False)
return strategy.run(step)
def test_dense_lookup():
steps = 4
warmups = 1
start = time.time()
for i in range(0, steps+warmups):
res = test_fn()
end0 = time.time()
res.numpy()
end = time.time()
total_bytes = batch * nnz * em * tf.float32.size
print("Test batch = ", batch, " nnz = ", nnz, ", em = ", em)
print(" RES shape: ", res.shape)
print("Whole loop time is : ", end0 - start, end - start)
print("TPU: total bytes {0}, mem bw {1:.3f} GB/s".format(total_bytes, total_bytes*1.0*steps/(end - start)/1.0e9))
test_dense_lookup()
print("done")
I'm trying to deploy a simple ML model on SageMaker to get the hang of it, and I am not having any luck because I get the following error:
ValueError: could not convert string to float: '6.320000000000000097e-03 1.800000000000000000e+01 2.310000000000000053e+00 0.000000000000000000e+00 5.380000000000000338e-01 6.575000000000000178e+00 6.520000000000000284e+01 4.089999999999999858e+00 1.000000000000000000e+00 2.960000000000000000e+02 1.530000000000000071e+01 3.968999999999999773e+02 4.980000000000000426e+00 2.400000000000000000e+01'
This is the first row of my dataframe.
This is the code in my notebook that I'm using right now:
from sagemaker import get_execution_role, Session
from sagemaker.sklearn.estimator import SKLearn
work_dir = 'data'
session = Session()
role = get_execution_role()
train_input = session.upload_data('data')
script = 'boston_housing_prep.py'
model = SKLearn(
entry_point = script,
train_instance_type = 'ml.c4.xlarge',
role = role,
sagemaker_session = session,
hyperparameters = {'alpha': 10}
)
model.fit({'train': train_input})
My script for boston_housing_prep.py looks like this:
import argparse
import pandas as pd
import os
from sklearn.linear_model import Ridge
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler
import numpy as np
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--alpha', type=int, default=1)
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
args = parser.parse_args()
input_files = [ os.path.join(args.train, file) for file in os.listdir(args.train) ]
if len(input_files) == 0:
raise ValueError(('There are no files in {}.\n' +
'This usually indicates that the channel ({}) was incorrectly specified,\n' +
'the data specification in S3 was incorrectly specified or the role specified\n' +
'does not have permission to access the data.').format(args.train, "train"))
raw_data = [ pd.read_csv(file, header=None, engine="python") for file in input_files ]
df = pd.concat(raw_data)
y_train = df.iloc[:, -1]
X_train = df.iloc[:, :5]
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
alpha = args.alpha
clf = Ridge(alpha=alpha)
clf = clf.fit(X_train, y_train)
joblib.dump(clf, os.path.join(args.model_dir, "model.joblib"))
def model_fn(model_dir):
clf = joblib.load(os.path.join(model_dir, "model.joblib"))
return clf
The line that's giving the problem is this one:
X_train = scaler.fit_transform(X_train)
I tried df = df.astype(np.float) after I loaded in the df, but that didn't work either.
This file loads in without a problem when I'm not in SageMaker.
I have multiple files in TXT format how to get all the values with a single output Merge values into a single file use command line arguments in pandas
like this:
python3 file1.txt file2.txt file3.txt
Code:
import pandas as pd
import socket, struct
import os
import glob
import sys
try:
file = sys.argv[1]
except Exception:
print("Usage: python3 {} [file]".format(sys.argv[0]))
sys.exit()
os.chdir('/Users/roc/Desktop/js/projj')
fileList = glob.glob('*.txt')
appended_data = []
for file in fileList:
pdd = pd.read_csv(file,header=None,sep='|',error_bad_lines=False, warn_bad_lines=False,skiprows=[0],names=['Name','Code','Ipv', 'Ip','Range','Date', 'Category'],low_memory=False)
df = pdd[pdd['Ipv'].str.contains("ipv4") & pdd['Ip'].str.contains('[0-9]')]
appended_data.append(df)
appended_data = pd.concat(appended_data)
df = pd.DataFrame(appended_data)
pd.options.mode.chained_assignment = None
def ip2int(ip):
packedIP = socket.inet_aton(ip)
return struct.unpack("!L", packedIP)[0]
df['Ip'] = df.Ip.apply(ip2int)
df['Range'] = df.groupby(['Code'])['Range'].transform('sum').fillna(0).astype(int)
k = df[['Ip', 'Range', 'Code']].dropna()
df2 = k.drop_duplicates(subset=['Range'])
result_df =df2.sort_values('Range', ascending=True)
print(result_df.to_csv("/Users/roc/Desktop/js/projj/delegated2.txt",sep=' ', index=False, header=False))
Use the below to iterate through a folder and append all files to a single dataframe
import os
import glob
os.chdir('C:\\path_to_folder\\')
Filelist = glob.glob('*.txt')
appended_data = []
for file in FileList:
pdd = pd.read_csv(file,header=None,sep='|',error_bad_lines=False, warn_bad_lines=False,skiprows=[0],names=['Name','Code','Ipv', 'Ip','Range','Date', 'Category'],low_memory=False)
df = pdd[pdd['Ipv'].str.contains("ipv4") & pdd['Ip'].str.contains('[0-9]')]
appended_data.append(df)
appended_data = pd.concat(appended_data)
df = pd.DataFrame(appended_data)
Once you have the df which is combined of all the data from all files, use the next part of the code:
pd.options.mode.chained_assignment = None
def ip2int(ip):
packedIP = socket.inet_aton(ip)
return struct.unpack("!L", packedIP)[0]
df['Ip'] = df.Ip.apply(ip2int) df['Range'] = df.groupby(['Code'])['Range'].transform('sum').fillna(0).astype(int)
k = df[['Ip', 'Range', 'Code']].dropna()
df2 = k.drop_duplicates(subset=['Range'])
result_df =df2.sort_values('Range', ascending=True)
result_df.to_csv("/Users/roc/Desktop/output.txt",sep=' ', index=False, header=False)
For some reason, I'm not able to detect multiple faces at a time. It's only detecting one face at one time. How do i resolve this issue? I've added the code below. I've used google's facenet for real time face recognition.
In the video output it creates a bounding box only on one face at a time. But in the console output it can count that the number of faces present are two or more than two.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
import cv2
import matplotlib.pyplot as plt
import numpy as np
import argparse
import facenet
import detect_face
import os
from os.path import join as pjoin
import sys
import time
import copy
import math
import pickle
from sklearn.svm import SVC
from sklearn.externals import joblib
#addded
#import reload
#reload(sys)
#sys.setdefaultencoding('utf8')
print('Creating networks and loading parameters')
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, './') #face detection
minsize = 20 # minimum size of face #minsize, threshold, factor used for detection
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
items = os.listdir("/Aryabhatta Robotics Internship/facenet-master/Real_time_face/ids/aligned")
#HumanNames = []
#for names in items:
#HumanNames.append(names)
#print(HumanNames)
#HumanNames = ['Alok','Siddhant','tesra','s01','s02','s03','s04','s05','s06','s07','s08','s09','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20'] #train human name, known face names
print('Loading feature extraction model')
modeldir = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/20180402-114759/20180402-114759.pb' #feature extraction mmodel
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/my_classifier/my_classifier.pkl' #out own classifier
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)#, encoding='latin1')
print('load classifier file-> %s' % classifier_filename_exp)
video_capture = cv2.VideoCapture(0)
c = 0
# #video writer
# fourcc = cv2.VideoWriter_fourcc(*'DIVX')
# out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(640,480))
print('Start Recognition!')
prevTime = 0
while True: #infinite loop
ret, frame = video_capture.read() #video capture from webcam
frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time() # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Detected_FaceNum: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
print("faceno:" + str(i))
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is inner of range!')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[0] = facenet.flip(cropped[0], False)
scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[0] = facenet.prewhiten(scaled[0])
scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
#print(emb_array)
threshold_accuracy = 155
predictions = model.predict_proba(emb_array)
#print(predictions)
for i in range(len(predictions[0])):
predictions[0][i] = np.exp(18*predictions[0][i])
#print(predictions)
best_class_indices = np.argmax(predictions, axis=1)
print(best_class_indices)
print("next")
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
print(best_class_probabilities)
for i in range(len(best_class_indices)):
print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i]))
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
#plot result idx under box
text_x = bb[i][0]
text_y = bb[i][3] + 20
# print('result: ', best_class_indices[0])
if best_class_probabilities[i] > threshold_accuracy :
#result_names = HumanNames[best_class_indices[0]]
cv2.putText(frame, class_names[best_class_indices[i]], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), thickness=1, lineType=2)
else:
cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), thickness=1, lineType=2)
#for H_i in HumanNames:
#if HumanNames[best_class_indices[0]] == H_i and best_class_probabilities[0] > threshold_accuracy :
#flag = 1
#result_names = HumanNames[best_class_indices[0]]
#cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
#1, (0, 0, 255), thickness=1, lineType=2)
#else:
#cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
# 1, (0, 0, 255), thickness=1, lineType=2)
else:
print('Unable to align')
sec = curTime - prevTime
prevTime = curTime
fps = 1 / (sec)
str1 = 'FPS: %2.3f' % fps
text_fps_x = len(frame[0]) - 150
text_fps_y = 20
cv2.putText(frame, str1, (text_fps_x, text_fps_y),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
# c+=1
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
# #video writer
# out.release()
cv2.destroyAllWindows()
Threshold accuracy should be between 0 to 1. Make sure Your threshold accuracy has to be >0.60.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
from skimage.transform import resize
import cv2
import numpy as np
import facenet
import detect_face
import os
import time
import pickle
import sys
img_path='download.jpeg'
modeldir = './model/20170511-185253.pb'
classifier_filename = './class/classifier.pkl'
npy='./npy'
train_img="./train_img"
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)
minsize = 10 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 1 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
HumanNames = os.listdir(train_img)
HumanNames.sort()
print('Loading feature extraction model')
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)
# video_capture = cv2.VideoCapture("akshay_mov.mp4")
c = 0
print('Start Recognition!')
prevTime = 0
# ret, frame = video_capture.read()
frame = cv2.imread(img_path,0)
frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time()+1 # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
print(1)
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Face Detected: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is too close')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[i] = facenet.flip(cropped[i], False)
scaled.append(resize(cropped[i], (image_size, image_size), anti_aliasing=True))
scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[i] = facenet.prewhiten(scaled[i])
scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
predictions = model.predict_proba(emb_array)
print(predictions)
best_class_indices = np.argmax(predictions, axis=1)
# print(best_class_indices)
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
print(best_class_probabilities)
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
#plot result idx under box
text_x = bb[i][0]
text_y = bb[i][3] + 20
print('Result Indices: ', best_class_indices[0])
print(HumanNames)
for H_i in HumanNames:
# print(H_i)
if HumanNames[best_class_indices[0]] == H_i:
result_names = HumanNames[best_class_indices[0]]
else:
print('Unable to align')
cv2.imshow('Image', frame)
if cv2.waitKey(100) & 0xFF == ord('q'):
sys.exit("Thanks")
cv2.destroyAllWindows()