How can I increase the resolution of my input video for my real time Object Detection API? - tensorflow

I am having trouble increasing the resolution in my input video for real time object detection. I have tried increasing the input resolution of the video read in, but the output gives me an error.
I am streaming the video using a webcam program
The only dimensions that work are 640 x 480. Anything bigger gives me an output error. The resolution will only stay 1920 x 1080 until before the frame window, then it switches back to 640 x 480 and gives me an error when the video saves.
import numpy as np
import tensorflow as tf
from object_detection.utils import visualization_utils as vis_util
import cv2 as cv
from time import time
import serial
refPoints = []
# draw ROI
def image_crop(event, x, y, flags, param):
global refPoints
if event == cv.EVENT_LBUTTONDOWN:
refPoints = [(x, y)]
elif event == cv.EVENT_LBUTTONUP:
refPoints.append((x, y))
# run inference on single image
def run_inference_for_single_image(image, graph, sess):
with graph.as_default():
boxes = tf.get_default_graph().get_tensor_by_name('detection_boxes:0')
scores = tf.get_default_graph().get_tensor_by_name('detection_scores:0')
classes = tf.get_default_graph().get_tensor_by_name('detection_classes:0')
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
rboxes, rscores, rclasses = sess.run([boxes, scores, classes], feed_dict={image_tensor: np.expand_dims(image, 0)})
return rboxes[0], rscores[0], rclasses[0]
def main():
# 0 - load model
PATH_TO_MODEL = 'C:\\frozen_inference_graph.pb'
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_MODEL, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# 1 - define video streamer & serial port
streamer = 0
out = cv.VideoWriter('output.avi', cv.VideoWriter_fourcc(*'XVID'), 20, (1920, 1080)) #previously 25.0, (640, 480)
port = 'COM4'
ser = serial.Serial(port, 9600)
# 2 - draw region of interest (ROI)
cap = cv.VideoCapture(cv.CAP_DSHOW)
clone = frame.copy()
cv.namedWindow('frame')
cv.resizeWindow('frame', (1920, 1080))
cv.setMouseCallback('frame', image_crop)
print(frame.shape)#
print('ROI selecting...')
while True:
cv.imshow('frame', frame)
if cv.waitKey(1) & 0xFF == ord('r'):
print('ROI selection reset.')
frame = clone.copy()
elif cv.waitKey(1) & 0xff == ord('c'):
print('ROI selected.')
cap.release()
break
if len(refPoints) == 2:
cv.rectangle(frame, refPoints[0], refPoints[1], (0, 255, 0), 2)
cv.imshow('frame', frame)
if cv.waitKey(0) & 0xFF == ord('q'):
cap.release()
cv.destroyAllWindows()
else:
print('only one ROI allowed.')
cap.release()
cv.destroyAllWindows()
return -1
# 3 - run inferences on ROI
t1 = 0
t2 = 0
t3 = 0
i = 0
cap = cv.VideoCapture(streamer)
cv.namedWindow('frame')
cv.resizeWindow('frame', (1920, 1080))
while(True):
# image reading & cropping
t0 = time()
ret, frame = cap.read()
if frame is None:
break
cropped = frame[refPoints[0][1]:refPoints[1][1], refPoints[0][0]:refPoints[1][0]]
dt = time()-t0
t1 += dt
# image inference
t0 = time()
image = cropped.copy()
boxes, scores, classes = run_inference_for_single_image(image, detection_graph, sess)
boxes = boxes[scores>0.95]
classes = classes[scores>0.95]
boxes1 = boxes[classes==1]
boxes2 = boxes[classes==2]
dt = time()-t0
t2 += dt
t0 = time()
cv.rectangle(frame, refPoints[0], refPoints[1], (0, 255, 0), 2)
if len(boxes1):
#vis_util.draw_bounding_boxes_on_image_array(image, boxes)
cv.rectangle(frame, refPoints[0], refPoints[1], (0, 0, 255), 4)
ser.write(b'1')
else:
ser.write(b'0')
#if len(boxes2):
# cv.rectangle(frame, refPoints[0], refPoints[1], (255, 0, 0), 4)
#print(frame.shape)
#print(frame.shape)#
cv.imshow('frame', frame)
out.write(frame)
dt = time()-t0
t3 += dt
i += 1
if cv.waitKey(1) & 0xFF == ord('q'):
break
print("image reading: average %f sec/frame"%(t1/i))
print("image processing: average %f sec/frame"%(t2/i))
print("image showing/saving: average %f sec/frame"%(t3/i))
ser.close()
sess.close()
cap.release()
out.release()
cv.destroyAllWindows()
if __name__ == '__main__':
main()

Related

how to pass continuous video stream to pyqt5 using Qthread correctly for face recognition?

I want to pass continuous video stream to pyqt5 qlabel named label_cam to show up in ui using QThread but it keep failed showing the video stream on the qlabel. The video stream is later aimed to recognize people out.
I have been trying to connect the signal "change_Pixmap" from the VideoThread class to the "set_image" function in the "label_cam" object but i guess the flow of code or variable assign is wrong. Below is my code.
class FaceRecogScreen(QDialog):
def init(self):
super(FaceRecogScreen, self).init()
uic.loadUi("face_recog.ui", self)
self.update_date_time()
self.pushButton_back.clicked.connect(self.back_to_main3)
self.load_model()
def load_model(self):
self.prototxt = "deploy.prototxt.txt"
self.model = "res10_300x300_ssd_iter_140000.caffemodel"
print("[INFORMATION] Loading model....")
self.net = cv2.dnn.readNetFromCaffe(self.prototxt, self.model)
weight = "facenet_keras_weights.h5"
self.model2 = load_model('FaceNetModel.h5')
self.model2.load_weights(weight)
self.collected_encodings = pickle.loads(open('face_encoding.pickle', "rb").read())
infile = open('face_encoding', 'rb')
data = pickle.load(infile)
self.knownEncodings, self.knownNames = data['encodings'], data['names']
self.knownEncodings = np.array(self.knownEncodings)
self.knownNames = np.array(self.knownNames)
self.clf = svm.SVC(gamma="scale", probability=True, tol=0.01)
self.clf.fit(self.knownEncodings, self.knownNames)
# self.label_cam= VideoLabel()
self.thread = VideoThread(self)
self.thread.change_Pixmap.connect(self.set_image)
# call the run() function in VideoThread class
self.thread.start()
# self.thread.change_Pixmap.connect(self.label_cam.set_image)
# # call the run() function in VideoThread class
# self.thread.start()
# layout = self.layout()
# layout.addWidget(self.label_cam)
# self.thread.run.start()
def update_date_time(self):
# Get the current date and time
date_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self.label_datetime.setText(date_time)
# Update the date and time in the table
def back_to_main3(self):
pushButton_back = WelcomeScreen()
widget.addWidget(pushButton_back)
widget.setCurrentIndex(widget.currentIndex()+1)
def set_image(self, frame):
self.setPixmap(QPixmap.fromImage(frame))
class VideoThread(QtCore.QThread):
change_Pixmap = QtCore.pyqtSignal(QtGui.QImage)
def run(self):
cap = cv2.VideoCapture(1)
while True:
ret, frame = cap.read()
if ret:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
h, w, ch = frame.shape
bytesPerLine = ch * w
convertToQtFormat = QtGui.QImage(frame.data, w, h, bytesPerLine, QtGui.QImage.Format_RGB888)
p = convertToQtFormat.scaled(640, 480, QtCore.Qt.KeepAspectRatio)
self.change_Pixmap.emit(p)
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(
frame, (160, 160)), 1.0, (300, 300), (104, 177, 123))
self.net.setInput(blob)
detections = self.net.forward()
self.frame = frame
# self.frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.normalize(frame, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
pixels = np.expand_dims(frame, axis=0)
encode = self.model2.predict(pixels)
face_name = []
for encoding in encode:
name = self.clf.predict([encoding])
face_name.extend(name)
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence < 0.5:
continue
box = detections[0, 0, i, 3:7]*np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
text = "{:.2f}%".format(confidence*100)
y = startY - 10 if startY - 10 > 10 else startY*10
if name == 'unknown':
cv2.rectangle(frame, (startX, y), (endX, endY), (0, 0, 255), 2)
cv2.putText(frame, name, (startX, startY),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
else:
cv2.rectangle(frame, (startX, y), (endX, endY), (0, 255, 0), 2)
cv2.putText(frame, name[0], (startX, startY),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

How to use trained yolov7 best.pt file as "vehicle plate detection" application in python?

I have trained a model using yolov7 https://colab.research.google.com/drive/1X9A8odmK4k6l26NDviiT6dd6TgR-piOa#scrollTo=nD-uPyQ_2jiN colab file. After training I got a file named best.pt and I want to use it as a service/application in python. There is a yolov3 example which I created. This one was using .weights so how can I use .pt?
import cv2
import numpy as np
import time
import os
def getPhoto():
ROOT_DIR = os.path.dirname(__file__)
URL = "http://192.168.1.3:4747/video"
PC_CAM = 0
net = cv2.dnn.readNet(
f"{ROOT_DIR}\\yolov3_custom_final.weights",
f"{ROOT_DIR}\\yolov3_custom.cfg",
)
classes = []
with open(f"{ROOT_DIR}\\classes.txt", "r") as f:
classes = f.read().splitlines()
timeElapsed = 0
wCam, hCam = 640, 360
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(2, 3))
cap = cv2.VideoCapture(URL)
cap.set(3, wCam)
cap.set(4, hCam)
while True:
_, img = cap.read()
cv2.imshow("Detection Screen", img)
if cv2.waitKey(1) == ord("c"):
break
height, width, _ = img.shape
blob = cv2.dnn.blobFromImage(
img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False
)
net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.6:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)
if len(indexes) > 0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(
img,
label + " " + confidence,
(x, y + 20),
font,
2,
(255, 255, 0),
2,
)
cv2.imwrite(f"{ROOT_DIR}\\DetectedPhoto.jpg", img)
print("Image Saved")
getPhoto()
I want to detect vehicle plates using .pt file and cut vehicle plates and save them as .jpeg.
you can do this with detect.py insided yolov7 folder. Run this:
python detect.py --weights best.pt --source image.jpg
you can download yolov7 with this method:
git clone https://github.com/WongKinYiu/yolov7.git
in yolov7 you can see detect.py

AttributeError: 'list' object has no attribute 'cuda'

I try to develop Convolution network deep learning for face recognition and right now when i try to run it said 'list' object has no attribute 'cuda' im not sure what went wrong can anyone check. the code below is for train the whole module and below that is for load the data
if name == 'main':
#set_trace()
args = edict({
'operation' : 'train',
'feature_file' : None,
'result_sample_path' : None,
'gpu' : 'GPU',
'path_image_query' : None,
'query_label' : 'Query label',
'dataset' : None,
'specific_dataset_folder_name' : 'lfw',
'img_extension' : 'jpg',
'preprocessing_method' : 'sphereface',
'model_name' : 'mobiface',
'batch_size' : 3,
'image_query':'/content/drive/My Drive/recfaces13/recfaces/datasets/LFW',
'train':True})
# selecting the size of the crop based on the network
if args.model_name == 'mobilefacenet' or args.model_name == 'sphereface':
crop_size = (96, 112)
elif args.model_name == 'mobiface' or args.model_name == 'shufflefacenet':
crop_size = (112, 112)
elif args.model_name == 'openface':
crop_size = (96, 96)
elif args.model_name == 'facenet':
crop_size = (160, 160)
else:
raise NotImplementedError("Model " + args.model_name + " not implemented")
if args.dataset is not None:
# process whole dataset
assert args.specific_dataset_folder_name is not None, 'To process a dataset, ' \
'the flag --specific_dataset_folder_name is required.'
process_dataset(args.operation, args.model_name, args.batch_size,
args.dataset, args.specific_dataset_folder_name,
args.img_extension, args.preprocessing_method, crop_size,
args.result_sample_path, args.feature_file)
#elif args.image_query is not None:
# process unique image
# dataset = ImageDataLoader(args.image_query, args.preprocessing_method,
# crop_size, args.operation == 'extract_features')
# dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, drop_last=False)
# features = None
elif args.operation == 'train':
##########set_trace()
net = load_net('mobilefacenet', 'gpu')
net = net.cuda()
model_name=args.model_name
dataset = LFW(args.image_query,args.specific_dataset_folder_name, args.img_extension, args.preprocessing_method, crop_size, args.train)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, drop_last=False)
# data_counter_per_class = np.zeros((len(dataloader)))
# for i in range(len(dataloader)):
# path = os.path.join('image_query', dataloader[i])
# files = get_files_from_folder(path)
# data_counter_per_class[i] = len(files)
# test_counter = np.round(data_counter_per_class * (1 - train_ratio))
#dataloader1=dataloader.split(',')
#train,test=train_test_split(dataloader,test_size=0.2)
#trainloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2, drop_last=False)
# testloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=False, num_workers=2, drop_last=False) //create path//
#create array of data path split that data path and
features = None
if args.feature_file is not None and os.path.isfile(args.feature_file):
features = scipy.io.loadmat(args.feature_file)
epoch = 2
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
train_loss = list()
#set_trace()
for i, data in enumerate(dataloader):
inps, labs = data
inps, labs = inps.cuda(args['device']), labs.cuda(args['device'])
inps.squeeze_(0)
labs.squeeze_(0)
inps = Variable(inps).cuda(args['device'])
labs = Variable(labs).cuda(args['device'])
optimizer.zero_grad()
outs = net(inps)
soft_outs = F.softmax(outs, dim=1)
prds = soft_outs.data.max(1)[1]
loss = criterion(outs, labs)
loss.backward()
optimizer.step()
prds = prds.squeeze_(1).squeeze_(0).cpu().numpy()
inps_np = inps.detach().squeeze(0).cpu().numpy()
labs_np = labs.detach().squeeze(0).cpu().numpy()
train_loss.append(loss.data.item())
print('[epoch %d], [iter %d / %d], [train loss %.5f]' % (epoch, i + 1, len(train_loader), np.asarray(train_loss).mean()))
Dataloader
class LFW(object):
def __init__(self, root, specific_folder, img_extension, preprocessing_method=None, crop_size=(96, 112)):
"""
Dataloader of the LFW dataset.
root: path to the dataset to be used.
specific_folder: specific folder inside the same dataset.
img_extension: extension of the dataset images.
preprocessing_method: string with the name of the preprocessing method.
crop_size: retrieval network specific crop size.
"""
self.preprocessing_method = preprocessing_method
self.crop_size = crop_size
self.imgl_list = []
self.classes = []
self.people = []
self.model_align = None
# read the file with the names and the number of images of each people in the dataset
with open(os.path.join(root, 'people.txt')) as f:
people = f.read().splitlines()[1:]
# get only the people that have more than 20 images
for p in people:
p = p.split('\t')
if len(p) > 1:
if int(p[1]) >= 20:
for num_img in range(1, int(p[1]) + 1):
self.imgl_list.append(os.path.join(root, specific_folder, p[0], p[0] + '_' +
'{:04}'.format(num_img) + '.' + img_extension))
self.classes.append(p[0])
self.people.append(p[0])
le = preprocessing.LabelEncoder()
self.classes = le.fit_transform(self.classes)
print(len(self.imgl_list), len(self.classes), len(self.people))
def __getitem__(self, index):
imgl = imageio.imread(self.imgl_list[index])
cl = self.classes[index]
# if image is grayscale, transform into rgb by repeating the image 3 times
if len(imgl.shape) == 2:
imgl = np.stack([imgl] * 3, 2)
imgl, bb = preprocess(imgl, self.preprocessing_method, crop_size=self.crop_size,
is_processing_dataset=True, return_only_largest_bb=True, execute_default=True)
# append image with its reverse
imglist = [imgl, imgl[:, ::-1, :]]
# normalization
for i in range(len(imglist)):
imglist[i] = (imglist[i] - 127.5) / 128.0
imglist[i] = imglist[i].transpose(2, 0, 1)
imgs = [torch.from_numpy(i).float() for i in imglist]
return imgs, cl, imgl, bb, self.imgl_list[index], self.people[index]
def __len__(self):
return len(self.imgl_list)

Why does OpenCV's Meanshift tracking algorithm only track object the first time?

I am running the meanshift tracking algorithm to track objects in a live stream(with webcam) in OpenCV however the algorithm only works the first time it is run and does not work when I run the program again unless I restart my computer. Why is this so?
Algorithm taken from: https://docs.opencv.org/trunk/db/df8/tutorial_py_meanshift.html
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
# take first frame of the video
ret,frame = cap.read()
# setup initial location of window
r,h,c,w = 250,90,400,125 # simply hardcoded the values
track_window = (c,r,w,h)
# set up the ROI for tracking
roi = frame[r:r+h, c:c+w]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
# Setup the termination criteria, either 10 iteration or move by atleast 1 pt
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
while(1):
ret ,frame = cap.read()
if ret == True:
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
# apply meanshift to get the new location
ret, track_window = cv2.meanShift(dst, track_window, term_crit)
# Draw it on image
x,y,w,h = track_window
img2 = cv2.rectangle(frame, (x,y), (x+w,y+h), 255,2)
cv2.imshow('img2',img2)
k = cv2.waitKey(60) & 0xff
if k == 27:
break
else:
cv2.imwrite(chr(k)+".jpg",img2)
else:
break
cv2.destroyAllWindows()
cap.release()

Tensorflow : train on mini batch, fast then slow

I am a beginner in tensorflow and I am trying to train a model using "mini batch". To do that I created a generator and iterate it. The problem I encounter is that, at the beginning of the epoch, the train seems fast (many batch per seconds) then the train slow down (1 batch per second) so I am wondering where I am wrong in my code but I do not find the problem.
def prepare_data(filename):
'''load file which give path and label for the data'''
f = open(filename, 'r')
data = [line.split() for line in f]
feat =[]
label=[]
for l in data:
feat.append(l[0])
label.append(l[1])
n_samples = len(feat)
shuf = list(range(n_samples))
random.shuffle(shuf)
count = Counter(label)
print(count)
feature = [feat[i] for i in shuf]
label = np.array(label, dtype=np.int)
return feature, label[shuf]
def get_specgrams(paths, nsamples=16000):
'''
Given list of paths, return specgrams.
'''
# read the wav files
wavs = [wavfile.read(x)[1] for x in paths]
# zero pad the shorter samples and cut off the long ones.
data = []
for wav in wavs:
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
data.append(d)
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return np.asarray(data)
def get_specgram(path, nsamples=16000):
'''
Given path, return specgrams.
'''
# read the wav files
wav = wavfile.read(path)[1]
# zero pad the shorter samples and cut off the long ones.
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return d
# multci classification binary labels
def one_hot_encode(labels, n_unique_labels=31):
n_labels = len(labels)
#print('number of unique labels:', n_unique_labels)
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return np.array(one_hot_encode, dtype=np.int)
#create_path_file('train/audio/')
def model(tr_features, tr_labels, ts_features, ts_labels):
# remove gpu device error
config = tf.ConfigProto(allow_soft_placement = True)
# parameters
BATCH_SIZE = 4
number_loop = math.ceil(len(tr_features)/BATCH_SIZE)
training_epochs = 10
n_dim = 16000
n_classes = 31 #len(np.unique(ts_labels))
n_hidden_units_one = 280
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.1
# get test data
ts_features, ts_labels = get_data(ts_features, ts_labels)
# Model
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
init = tf.initialize_all_variables()
# function and optimizers
cost_function = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train loop
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session(config = config) as sess:
sess.run(init)
for epoch in range(training_epochs):
print(' ## Epoch n°', epoch+1 )
batch = batch_generator(BATCH_SIZE, tr_features, tr_labels)
acc_total = 0.0
for cpt, (train_features_batch, train_labels_batch) in enumerate(batch):
_,cost = sess.run([optimizer,cost_function],feed_dict={X:train_features_batch,Y:train_labels_batch})
cost_history = np.append(cost_history,cost)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = accuracy.eval(feed_dict={X:train_features_batch,Y:train_labels_batch})
acc_total = (acc_total*cpt + acc)/(cpt+1)
print('Train accuracy : ', acc_total, '[',str(cpt+1), '/',str(number_loop), ']' ,flush=True, end='\r')
clear_output()
print('Train accuracy : ', acc_total)
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
y_true = sess.run(tf.argmax(ts_labels,1))
print('Test accuracy: ', round(sess.run(accuracy, feed_dict={X: ts_features, Y: ts_labels}) , 3))
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
def batch_generator(batch_size, feat_path, labels):
n_sample = len(feat_path)
ite = math.ceil(n_sample/batch_size)
for i in range(0, ite):
if i == ite-1:
label = one_hot_encode(labels[-batch_size:])
feat = get_specgrams(feat_path[-batch_size:])
yield (feat, label)
else:
label = one_hot_encode(labels[i*batch_size:i*batch_size+batch_size])
feat = get_specgrams(feat_path[i*batch_size:i*batch_size+batch_size])
yield (feat, label)
def get_data(feat_path, labels):
feat = get_specgrams(feat_path)
label = one_hot_encode(labels)
return feat, label
def __main__():
print('## Load data and shuffle')
feat_path, labels = prepare_data('data_labelised2.txt')
idx = int(len(labels)*0.8)
print("## Create Model")
model(feat_path[0:idx], labels[0:idx], feat_path[idx+1:], labels[idx+1:])
with tf.device('/gpu:0'):
__main__()