ValueError: cannot reshape array of size 692224 into shape (1,3,416,416)-yolov5 cpu error - resize

I was trying to run my yolov5 custom model on cpu and I got this error.
this is the github page I have used : https://github.com/Amelia0911/onnxruntime-for-yolov5
import onnxruntime
from models.utils import *
import time
IMAGE_SIZE = (416, 416)
CONF_TH = 0.3
NMS_TH = 0.45
CLASSES = 80
model = onnxruntime.InferenceSession("models_train/bestnone.onnx")
anchor_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
stride = [8, 16, 32]
def draw(img, boxinfo, dst, id):
for *xyxy, conf, cls in boxinfo:
label = '{}|{}'.format(int(cls), '%.2f' % conf)
plot_one_box(xyxy, img, label=label, color=[0, 0, 255])
cv2.imencode('.jpg', img)[1].tofile(dst)
def detect(image):
img = cv2.resize(image,IMAGE_SIZE)
img = img.transpose(2, 0, 1)
dataset = (img, image)
img = dataset[0].astype('float32')
img_size = [dataset[0].shape[1], dataset[0].shape[2]]
img /= 255.0
img = img.reshape(1, 3, img_size[0], img_size[1])
inputs = {model.get_inputs()[0].name: img}
pred = torch.tensor(model.run(None, inputs)[0])
anchor = torch.tensor(anchor_list).float().view(3, -1, 2)
area = img_size[0]*img_size[1]
size = [int(area/stride[0]**2), int(area/stride[1]**2), int(area/stride[2]**2)]
feature = [[int(j/stride[i]) for j in img_size] for i in range(3)]
y = []
y.append(pred[:, :size[0]*3, :])
y.append(pred[:, size[0]*3:size[0]*3+size[1]*3, :])
y.append(pred[:, size[0]*3+size[1]*3:, :])
grid = []
for k, f in enumerate(feature):
grid.append([[i, j] for j in range(f[0]) for i in range(f[1])])
z = []
for i in range(3):
src = y[i]
xy = src[..., 0:2] * 2. - 0.5
wh = (src[..., 2:4] * 2) ** 2
dst_xy = []
dst_wh = []
for j in range(3):
dst_xy.append((xy[:, j*size[i]:(j+1)*size[i], :] + torch.tensor(grid[i])) * stride[i])
dst_wh.append(wh[:, j*size[i]:(j+1)*size[i], :] * anchor[i][j])
src[..., 0:2] = torch.from_numpy(np.concatenate((dst_xy[0], dst_xy[1], dst_xy[2]), axis=1))
src[..., 2:4] = torch.from_numpy(np.concatenate((dst_wh[0], dst_wh[1], dst_wh[2]), axis=1))
z.append(src.view(1, -1, CLASSES+5)) #85
pred = torch.cat(z, 1)
pred = nms(pred, CONF_TH, NMS_TH)
for det in pred:
if det is not None and len(det):
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], dataset[1].shape).round()
if det == None:
return np.array([])
return det
if __name__ == '__main__':
import time
src = 'Temp-640x640.jpg'
t1 = time.time()
img = cv2.imdecode(np.fromfile(src, dtype=np.uint8), -1)
print(IMAGE_SIZE)
results = detect(img)
t2 = time.time()
print(results)
print("onnxruntime time = ", t2 - t1)
if results is not None and len(results):
draw(img, results, 'dst3.jpg', str(id))
print('Down!')
when I run this code I got the following error:
File "C:\Users\acer\.spyder-py3\metallic surface defect detection\3_onnx_cpu_detec.py", line 85, in <module>
results = detect(img)
File "C:\Users\acer\.spyder-py3\metallic surface defect detection\3_onnx_cpu_detec.py", line 30, in detect
img = img.reshape(1, 3, img_size[0], img_size[1])
ValueError: cannot reshape array of size 692224 into shape (1,3,416,416)
I think it is a color channel issue. I have tried to fix it .But it doesn't work .If someone know how to fix it please inform me.Thanks in advance

Related

Set timer on detected object

i'm using yolo to detect object but i want to set timer for the detected object, can anyone help me?
so i want to make the object detecting with limited time for my projcet
i'm try my best but i don't have any idea how to do it
here is my code:
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
whT = 320
confThreshold = 0.1
nmsThreshold = 0.4
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = [line.strip() for line in f.readlines()]
modelConfiguration = "yolov4.cfg"
modelWeights = "yolov4.weights"
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
indices = cv.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
font = cv.FONT_HERSHEY_PLAIN
for i in indices:
label = str(classNames[classIds[i]])
x, y, w, h = bbox[i]
#print(x,y,w,h)
cv.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
cv.putText(img, label, (x, y + 30), font, 3, (0,0,0), 3)
print("Jenis Mobil: " + label)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%', (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
while True:
success, img = cap.read()
blob = cv.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv.imshow('Image', img)
key = cv.waitKey(1)
if key == 27:
break
cap.release()
cv.destroyAllWindows()

how to pass continuous video stream to pyqt5 using Qthread correctly for face recognition?

I want to pass continuous video stream to pyqt5 qlabel named label_cam to show up in ui using QThread but it keep failed showing the video stream on the qlabel. The video stream is later aimed to recognize people out.
I have been trying to connect the signal "change_Pixmap" from the VideoThread class to the "set_image" function in the "label_cam" object but i guess the flow of code or variable assign is wrong. Below is my code.
class FaceRecogScreen(QDialog):
def init(self):
super(FaceRecogScreen, self).init()
uic.loadUi("face_recog.ui", self)
self.update_date_time()
self.pushButton_back.clicked.connect(self.back_to_main3)
self.load_model()
def load_model(self):
self.prototxt = "deploy.prototxt.txt"
self.model = "res10_300x300_ssd_iter_140000.caffemodel"
print("[INFORMATION] Loading model....")
self.net = cv2.dnn.readNetFromCaffe(self.prototxt, self.model)
weight = "facenet_keras_weights.h5"
self.model2 = load_model('FaceNetModel.h5')
self.model2.load_weights(weight)
self.collected_encodings = pickle.loads(open('face_encoding.pickle', "rb").read())
infile = open('face_encoding', 'rb')
data = pickle.load(infile)
self.knownEncodings, self.knownNames = data['encodings'], data['names']
self.knownEncodings = np.array(self.knownEncodings)
self.knownNames = np.array(self.knownNames)
self.clf = svm.SVC(gamma="scale", probability=True, tol=0.01)
self.clf.fit(self.knownEncodings, self.knownNames)
# self.label_cam= VideoLabel()
self.thread = VideoThread(self)
self.thread.change_Pixmap.connect(self.set_image)
# call the run() function in VideoThread class
self.thread.start()
# self.thread.change_Pixmap.connect(self.label_cam.set_image)
# # call the run() function in VideoThread class
# self.thread.start()
# layout = self.layout()
# layout.addWidget(self.label_cam)
# self.thread.run.start()
def update_date_time(self):
# Get the current date and time
date_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self.label_datetime.setText(date_time)
# Update the date and time in the table
def back_to_main3(self):
pushButton_back = WelcomeScreen()
widget.addWidget(pushButton_back)
widget.setCurrentIndex(widget.currentIndex()+1)
def set_image(self, frame):
self.setPixmap(QPixmap.fromImage(frame))
class VideoThread(QtCore.QThread):
change_Pixmap = QtCore.pyqtSignal(QtGui.QImage)
def run(self):
cap = cv2.VideoCapture(1)
while True:
ret, frame = cap.read()
if ret:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
h, w, ch = frame.shape
bytesPerLine = ch * w
convertToQtFormat = QtGui.QImage(frame.data, w, h, bytesPerLine, QtGui.QImage.Format_RGB888)
p = convertToQtFormat.scaled(640, 480, QtCore.Qt.KeepAspectRatio)
self.change_Pixmap.emit(p)
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(
frame, (160, 160)), 1.0, (300, 300), (104, 177, 123))
self.net.setInput(blob)
detections = self.net.forward()
self.frame = frame
# self.frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.normalize(frame, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
pixels = np.expand_dims(frame, axis=0)
encode = self.model2.predict(pixels)
face_name = []
for encoding in encode:
name = self.clf.predict([encoding])
face_name.extend(name)
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence < 0.5:
continue
box = detections[0, 0, i, 3:7]*np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
text = "{:.2f}%".format(confidence*100)
y = startY - 10 if startY - 10 > 10 else startY*10
if name == 'unknown':
cv2.rectangle(frame, (startX, y), (endX, endY), (0, 0, 255), 2)
cv2.putText(frame, name, (startX, startY),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
else:
cv2.rectangle(frame, (startX, y), (endX, endY), (0, 255, 0), 2)
cv2.putText(frame, name[0], (startX, startY),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

how to slice the tensorflow tensor to multiple

the tensor is:
batch(3) * length(5) * dim(2)
tensor = tf.constant([[[1,1],[2,2],[3,3],[4,4],[5,5]],[[1,1],[2,2],[3,3],[4,4],[5,5]],[[1,1],[2,2],[3,3],[4,4],[5,5]]] )
and i want get more slices by length_index [0,0],[0,1] ... [3,4],[4,4] according to length_axis_index[0,1,2,3,4],the operation like
spans_length=0
with tf.variable_scope("loss_span"):
output=[]
for i in range(0,1+n_spans):
for j in range(1,seq_length):
if j + i < seq_length:
res = tf.slice(output_layer_sequence, [0, j, 0], [-1, j+i-j+1, -1])
res = tf.reduce_sum(res,axis=1)
output.append(res)
# output = tf.convert_to_tensor(output)
spans_length+=1
output = tf.convert_to_tensor(output)
vsp = tf.transpose(output, [1,0,2])#batch , spans_length,hidden_size
vsp = tf.reshape(vsp,[-1,hidden_size])#batch * span_length,hidden_size
span_logits = tf.matmul(vsp, output_span_weight, transpose_b=True) # output:[batch * spans_length,class_labels]
span_logits = tf.nn.bias_add(span_logits, output_span_bias) # output:[batch * spans_length,class_labels]
span_matrix = tf.reshape(span_logits,[-1,spans_length,class_labels],name="span_matrix_val")#[batch , spans_length,class_labels]
label_span_logists = tf.one_hot(indices=label_span,depth=class_labels, on_value=1, off_value=0, axis=-1, dtype=tf.int32)
label_span_logists=tf.cast(label_span_logists,tf.int64)
span_loss = tf.nn.softmax_cross_entropy_with_logits(logits=span_matrix, labels=label_span_logists)
span_loss = tf.reduce_mean(span_loss, name='loss_span')
when i doing such operation, training model 's time is very long;how to speed it.thanks
This code works:
# tensor = tf.constant([[[1,1],[2,2],[3,3],[4,4],[5,5]],[[1,1],[2,2],[3,3],[4,4],[5,5]],[[1,1],[2,2],[3,3],[4,4],[5,5]]] )
tensor = tf.random.uniform((3, 2000, 2))
length = tf.shape(tensor)[1].numpy()
output = []
for begins in range(length):
for size in range(length - begins):
res = tf.slice(tensor, [0, begins, 0], [-1, size + 1, -1])
res = tf.reduce_sum(res)
output.append(res)
output = tf.convert_to_tensor(output)
I tried to use tf.scan(), but I don't see any benefits:
output = tf.constant([], tf.int32)
for begins in range(length):
t = tensor[:, begins:, :]
t = tf.transpose(t, (1, 0, 2))
t = tf.scan(lambda a, x: a + x, t)
t = tf.transpose(t, (1, 0, 2))
t = tf.reduce_sum(t, [0, 2])
output = tf.concat([output, t], 0)
Edits:
Tried to apply reduce_sum() along the unused dimension [0, 2] in preprocessing:
tensor = tf.reduce_sum(tensor, [0, 2])
output = tf.constant([])
for begins in range(length):
t = tensor[begins:]
t = tf.scan(lambda a, x: a + x, t)
output = tf.concat([output, t], 0)
Still don't see performance benefits.
for i in range(0,50):
for j in range(1,200):
if j + i < 200:
res = tf.slice(output_layer_sequence, [0, j, 0], [-1, j+i-j+1, -1])
res = tf.reduce_sum(res,axis=1)
output.append(res)
output = tf.convert_to_tensor(output)
when i doing such operation, training time is very long;how to speed it.thanks

AttributeError: 'numpy.float32' object has no attribute 'to_cpu'

Good day,
I'm developing a deep learning model for wireless signal detection. Below is the snippet of the function that computes the model accuracy and bit error rate (BER):
from chainer.datasets import TupleDataset
import numpy as np
from chainer import cuda
from chainer import function
def get_idp_acc(model, dataset_tuple, comp_ratio, profile = None, batchsize = 128, gpu = -1):
chainer.config.train = True
xp = np if gpu < 0 else cuda.cupy
x, indices, x_zf, HtH, Hty = dataset_tuple._datasets[0], dataset_tuple._datasets[1], dataset_tuple._datasets[2], dataset_tuple._datasets[3], dataset_tuple._datasets[4]
accs = 0
BERs = 0
model.train = False
for j in range(0, len(x), batchsize):
x_batch = xp.array(x[j:j + batchsize])
indices_batch = xp.array(indices[j:j + batchsize])
x_zf_batch = xp.array(x_zf[j:j + batchsize])
HtH_batch = xp.array(HtH[j:j + batchsize])
Hty_batch = xp.array(Hty[j:j + batchsize])
if profile == None:
acc_data = model(x_batch, indices_batch, x_zf_batch, HtH_batch, Hty_batch, comp_ratio = comp_ratio,
ret_param = 'acc')
else:
acc_data = model(x_batch, indices_batch, x_zf_batch, HtH_batch, Hty_batch, comp_ratio = comp_ratio,
ret_param = 'acc', profile = profile)
acc_data.to_cpu()
acc = acc_data.data
BER = 1.0 - acc
accs += acc * len(x_batch)
BERs += BER * len(x_batch)
return (accs / len(x)) * 100.
When the code is run, I get the following error below despite having imported all the required chainer modules. I really need your help on this issue as I'm stuck for nearly two months without making any headways in my project.
Traceback (most recent call last):
File "/Users/mac/Documents/idp_detnet/examples/run_mlp.py", line 14, in <module>
mlp.run(args)
File "/Users/mac/Documents/idp_detnet/examples/mlp.py", line 39, in run
acc_dict[name], BER_dict[name] = util.sweep_idp(model, test, comp_ratios, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 107, in sweep_idp
batchsize=args.batchsize, profile=profile))
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 83, in get_idp_acc
acc_data.to_cpu()
AttributeError: 'numpy.float32' object has no attribute 'to_cpu'
Below is the additional information providing codes for model definition:
K = 10
num_layers = 3*K
def lin_soft_sign(x, t):
'''Linear soft sign activation function from the original paper Eq. (11)'''
y = -1 + F.relu(x + t)/ F.absolute(t) - F.relu(- t)/ F.absolute(t)
return y
def accuracy(x, y):
'''Computes the fraction of elements for which x and y are equal'''
return np.mean(np.equal(x, y)).astype(np.float32)
class MLP(chainer.Chain):
def __init__(self, K, coeff_generator, profiles = None, z_dims = 8*K, v_dims = 2*K):
super(MLP, self).__init__()
if profiles == None:
profiles = [(0, 10)]
self.coeff_generator = coeff_generator
self.z_dims = z_dims
self.v_dims = v_dims
self.K = K
self.profiles = profiles
self.profile = 0
with self.init_scope():
self.p0_l1 = IncompleteLinear(None, self.z_dims)
self.p1_l1 = IncompleteLinear(None, self.z_dims)
self.p2_l1 = IncompleteLinear(None, self.z_dims)
self.p0_lv = IncompleteLinear(None, self.v_dims)
self.p1_lv = IncompleteLinear(None, self.v_dims)
self.p2_lv = IncompleteLinear(None, self.v_dims)
self.p0_l3 = IncompleteLinear(None, self.K)
self.p1_l3 = IncompleteLinear(None, self.K)
self.p2_l3 = IncompleteLinear(None, self.K)
def __call__(self, x, indices, x_zf, HtH, Hty, ret_param = 'loss', profile = None, comp_ratio = None):
if profile == None:
profile = self.profile
# Form Zero-forcing detection
err_rel = F.sum((x - x_zf)**2, axis = 1)
params = layer_profile(self.coeff_generator,
*self.profiles[profile], self.z_dims,
self.v_dims, comp_ratio)
def detnet_layer(x_d, x_logit, v, z_dims, v_dims):
HtH_x = np.matmul(HtH, np.expand_dims(x_d.data, axis = 2).astype(np.float32))
HtH_x = F.squeeze(HtH_x, axis = -1)
#x_concat = np.concatenate([Hty, x, HtH_x, v], axis=1)
x_concat = F.concat([Hty, x_d, HtH_x, v], axis = 1)
if profile == 0:
z = F.relu(self.p0_l1(x_concat))
v += self.p0_lv(z, *params)
x_logit += self.p0_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
elif profile == 1:
z = F.relu(self.p1_l1(x_concat))
v += self.p1_lv(z, *params)
x_logit += self.p1_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
elif profile == 2:
z = F.relu(self.p2_l1(x_concat))
v += self.p2_lv(z, *params)
x_logit += self.p2_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
return x, x_logit, v
x_k = np.zeros((Hty.shape[0], self.K), dtype = np.float32)
x_k_logit = np.zeros((Hty.shape[0], self.K), dtype = np.float32)
v = np.zeros((Hty.shape[0], self.v_dims), dtype = np.float32)
loss = 0
mod = sg.Modulator('BPSK', K)
for k in range(1, num_layers + 1):
x_k, x_k_logit, v = detnet_layer(x_k, x_k_logit, v, self.z_dims, self.v_dims)
err = F.sum((x - x_k)**2, 1)
loss += (np.log(k)).astype(np.float32) * F.mean(err/err_rel)
report = {'loss': loss, 'acc': accuracy(mod.demodulate(x_k.data), indices)}
reporter.report(report, self)
return report[ret_param]
def report_params(self):
return ['validation/main/acc']
def param_names(self):
if len(self.profiles) > 1:
return 'IDPDETNET_{}_{}_{}_p{}'.format(self.z_dims, self.v_dims, self.coeff_generator.__name__, len(self.profiles))
return 'IDPDETNET_{}_{}_{}'.format(self.z_dims, self.v_dims, self.coeff_generator.__name__)
import os
import sys
sys.path.insert(0, os.path.abspath(
os.path.join(os.path.dirname(__file__), '..')))
import numpy as np
import visualize as vz
import idp.coeffs_generator as cg
from net import MLP
import util
K = 10
N = 4
v_dims = 2*K
z_dims = 8*K
SNR_dB_tmin = -4
SNR_dB_tmax = 24
SNR_dB_test = np.linspace(SNR_dB_tmin, SNR_dB_tmax, 8)
num_snr_test = len(SNR_dB_test)
def run(args):
train, test = util.get_dataset(args.modeltype)
names = ['all-one (standard)', 'linear']
colors = [vz.colors.all_one_lg, vz.colors.linear_lg]
models = [
MLP.MLP(K, cg.uniform, z_dims = 8*K, v_dims = 2*K),
MLP.MLP(K, cg.linear, z_dims = 8*K, v_dims = 2*K)
]
comp_ratios = np.linspace(0.1, 1.0, 20)
acc_dict = {}
BER_dict = {}
ratios_dict = {}
for i in range(num_snr_test):
for name, model in zip(names, models):
util.load_or_train_model(model, train, test, args)
acc_dict[name], BER_dict[name] = util.sweep_idp(model, test, comp_ratios, args)
ratios_dict[name] = [100. * cr for cr in comp_ratios]
filename = "IDPDETNET1_{}".format(args.modeltype)
vz.plot(ratios_dict, acc_dict, names, filename, colors = colors,
folder = args.figure_path, ext=args.ext,
title = 'IDPDETNET (BPSK)',
xlabel = 'IDP (%)',
ylabel = 'Test Accuracy (%)', ylim = (0, 100))
filename = "IDPDETNET2_{}".format(args.modeltype)
vz.plot(ratios_dict, BER_dict, names, filename, colors = colors,
folder=args.figure_path, ext=args.ext,
title='IDPDETNET (BPSK)',
xlabel='IDP (%)',
ylabel='BER (bits/sec)')
filename = "IDPDETNET3_{}".format(args.modeltype)
vz.plot(num_snr_test, BER_dict, names, filename, colors = colors,
folder = args.figure_path, ext = args.ext,
title = 'IDPDETNET (BPSK)',
xlabel = 'SNR (dB)',
ylabel = ' BER (bits/sec)')
if __name__ == '__main__':
args = util.default_parser('IDPDETNET Example').parse_args()
run(args)
Hi Seiya Tokui. Thank you for your kind input. Here is the model definition based on the above code:
model = MLP.MLP(K, cg.uniform, z_dims = 8*K, v_dims = 2*K)
OR
model = MLP.MLP(K, cg.linear, z_dims = 8*K, v_dims = 2*K)
Hi #BloodyD. Thank for your brilliant contributions. The model started training, but then later returned the following error:
1 nan nan 0.50108 5.85448
Traceback (most recent call last):
File "run_mlp.py", line 14, in <module>
mlp.run(args)
File "/Users/mac/Documents/idp_detnet/examples/mlp.py", line 38, in run
util.load_or_train_model(model, train, test, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 204, in load_or_train_model
train_model(model, train, test, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 184, in train_model
return eval(fp.read().replace('\n', ''))
File "<string>", line 1, in <module>
NameError: name 'NaN' is not defined
The error occurs in the last line of this snippet code below:
name = model.param_names()
save_model(model, os.path.join(args.model_path, name))
chainer.config.train = False
with open(os.path.join(args.out, 'log'), 'r') as fp:
return eval(fp.read().replace('\n', ''))

Tensorflow : train on mini batch, fast then slow

I am a beginner in tensorflow and I am trying to train a model using "mini batch". To do that I created a generator and iterate it. The problem I encounter is that, at the beginning of the epoch, the train seems fast (many batch per seconds) then the train slow down (1 batch per second) so I am wondering where I am wrong in my code but I do not find the problem.
def prepare_data(filename):
'''load file which give path and label for the data'''
f = open(filename, 'r')
data = [line.split() for line in f]
feat =[]
label=[]
for l in data:
feat.append(l[0])
label.append(l[1])
n_samples = len(feat)
shuf = list(range(n_samples))
random.shuffle(shuf)
count = Counter(label)
print(count)
feature = [feat[i] for i in shuf]
label = np.array(label, dtype=np.int)
return feature, label[shuf]
def get_specgrams(paths, nsamples=16000):
'''
Given list of paths, return specgrams.
'''
# read the wav files
wavs = [wavfile.read(x)[1] for x in paths]
# zero pad the shorter samples and cut off the long ones.
data = []
for wav in wavs:
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
data.append(d)
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return np.asarray(data)
def get_specgram(path, nsamples=16000):
'''
Given path, return specgrams.
'''
# read the wav files
wav = wavfile.read(path)[1]
# zero pad the shorter samples and cut off the long ones.
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return d
# multci classification binary labels
def one_hot_encode(labels, n_unique_labels=31):
n_labels = len(labels)
#print('number of unique labels:', n_unique_labels)
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return np.array(one_hot_encode, dtype=np.int)
#create_path_file('train/audio/')
def model(tr_features, tr_labels, ts_features, ts_labels):
# remove gpu device error
config = tf.ConfigProto(allow_soft_placement = True)
# parameters
BATCH_SIZE = 4
number_loop = math.ceil(len(tr_features)/BATCH_SIZE)
training_epochs = 10
n_dim = 16000
n_classes = 31 #len(np.unique(ts_labels))
n_hidden_units_one = 280
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.1
# get test data
ts_features, ts_labels = get_data(ts_features, ts_labels)
# Model
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
init = tf.initialize_all_variables()
# function and optimizers
cost_function = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train loop
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session(config = config) as sess:
sess.run(init)
for epoch in range(training_epochs):
print(' ## Epoch n°', epoch+1 )
batch = batch_generator(BATCH_SIZE, tr_features, tr_labels)
acc_total = 0.0
for cpt, (train_features_batch, train_labels_batch) in enumerate(batch):
_,cost = sess.run([optimizer,cost_function],feed_dict={X:train_features_batch,Y:train_labels_batch})
cost_history = np.append(cost_history,cost)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = accuracy.eval(feed_dict={X:train_features_batch,Y:train_labels_batch})
acc_total = (acc_total*cpt + acc)/(cpt+1)
print('Train accuracy : ', acc_total, '[',str(cpt+1), '/',str(number_loop), ']' ,flush=True, end='\r')
clear_output()
print('Train accuracy : ', acc_total)
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
y_true = sess.run(tf.argmax(ts_labels,1))
print('Test accuracy: ', round(sess.run(accuracy, feed_dict={X: ts_features, Y: ts_labels}) , 3))
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
def batch_generator(batch_size, feat_path, labels):
n_sample = len(feat_path)
ite = math.ceil(n_sample/batch_size)
for i in range(0, ite):
if i == ite-1:
label = one_hot_encode(labels[-batch_size:])
feat = get_specgrams(feat_path[-batch_size:])
yield (feat, label)
else:
label = one_hot_encode(labels[i*batch_size:i*batch_size+batch_size])
feat = get_specgrams(feat_path[i*batch_size:i*batch_size+batch_size])
yield (feat, label)
def get_data(feat_path, labels):
feat = get_specgrams(feat_path)
label = one_hot_encode(labels)
return feat, label
def __main__():
print('## Load data and shuffle')
feat_path, labels = prepare_data('data_labelised2.txt')
idx = int(len(labels)*0.8)
print("## Create Model")
model(feat_path[0:idx], labels[0:idx], feat_path[idx+1:], labels[idx+1:])
with tf.device('/gpu:0'):
__main__()