I was trying to run my yolov5 custom model on cpu and I got this error.
this is the github page I have used : https://github.com/Amelia0911/onnxruntime-for-yolov5
import onnxruntime
from models.utils import *
import time
IMAGE_SIZE = (416, 416)
CONF_TH = 0.3
NMS_TH = 0.45
CLASSES = 80
model = onnxruntime.InferenceSession("models_train/bestnone.onnx")
anchor_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
stride = [8, 16, 32]
def draw(img, boxinfo, dst, id):
for *xyxy, conf, cls in boxinfo:
label = '{}|{}'.format(int(cls), '%.2f' % conf)
plot_one_box(xyxy, img, label=label, color=[0, 0, 255])
cv2.imencode('.jpg', img)[1].tofile(dst)
def detect(image):
img = cv2.resize(image,IMAGE_SIZE)
img = img.transpose(2, 0, 1)
dataset = (img, image)
img = dataset[0].astype('float32')
img_size = [dataset[0].shape[1], dataset[0].shape[2]]
img /= 255.0
img = img.reshape(1, 3, img_size[0], img_size[1])
inputs = {model.get_inputs()[0].name: img}
pred = torch.tensor(model.run(None, inputs)[0])
anchor = torch.tensor(anchor_list).float().view(3, -1, 2)
area = img_size[0]*img_size[1]
size = [int(area/stride[0]**2), int(area/stride[1]**2), int(area/stride[2]**2)]
feature = [[int(j/stride[i]) for j in img_size] for i in range(3)]
y = []
y.append(pred[:, :size[0]*3, :])
y.append(pred[:, size[0]*3:size[0]*3+size[1]*3, :])
y.append(pred[:, size[0]*3+size[1]*3:, :])
grid = []
for k, f in enumerate(feature):
grid.append([[i, j] for j in range(f[0]) for i in range(f[1])])
z = []
for i in range(3):
src = y[i]
xy = src[..., 0:2] * 2. - 0.5
wh = (src[..., 2:4] * 2) ** 2
dst_xy = []
dst_wh = []
for j in range(3):
dst_xy.append((xy[:, j*size[i]:(j+1)*size[i], :] + torch.tensor(grid[i])) * stride[i])
dst_wh.append(wh[:, j*size[i]:(j+1)*size[i], :] * anchor[i][j])
src[..., 0:2] = torch.from_numpy(np.concatenate((dst_xy[0], dst_xy[1], dst_xy[2]), axis=1))
src[..., 2:4] = torch.from_numpy(np.concatenate((dst_wh[0], dst_wh[1], dst_wh[2]), axis=1))
z.append(src.view(1, -1, CLASSES+5)) #85
pred = torch.cat(z, 1)
pred = nms(pred, CONF_TH, NMS_TH)
for det in pred:
if det is not None and len(det):
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], dataset[1].shape).round()
if det == None:
return np.array([])
return det
if __name__ == '__main__':
import time
src = 'Temp-640x640.jpg'
t1 = time.time()
img = cv2.imdecode(np.fromfile(src, dtype=np.uint8), -1)
print(IMAGE_SIZE)
results = detect(img)
t2 = time.time()
print(results)
print("onnxruntime time = ", t2 - t1)
if results is not None and len(results):
draw(img, results, 'dst3.jpg', str(id))
print('Down!')
when I run this code I got the following error:
File "C:\Users\acer\.spyder-py3\metallic surface defect detection\3_onnx_cpu_detec.py", line 85, in <module>
results = detect(img)
File "C:\Users\acer\.spyder-py3\metallic surface defect detection\3_onnx_cpu_detec.py", line 30, in detect
img = img.reshape(1, 3, img_size[0], img_size[1])
ValueError: cannot reshape array of size 692224 into shape (1,3,416,416)
I think it is a color channel issue. I have tried to fix it .But it doesn't work .If someone know how to fix it please inform me.Thanks in advance
Related
i'm using yolo to detect object but i want to set timer for the detected object, can anyone help me?
so i want to make the object detecting with limited time for my projcet
i'm try my best but i don't have any idea how to do it
here is my code:
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
whT = 320
confThreshold = 0.1
nmsThreshold = 0.4
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = [line.strip() for line in f.readlines()]
modelConfiguration = "yolov4.cfg"
modelWeights = "yolov4.weights"
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
indices = cv.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
font = cv.FONT_HERSHEY_PLAIN
for i in indices:
label = str(classNames[classIds[i]])
x, y, w, h = bbox[i]
#print(x,y,w,h)
cv.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
cv.putText(img, label, (x, y + 30), font, 3, (0,0,0), 3)
print("Jenis Mobil: " + label)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%', (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
while True:
success, img = cap.read()
blob = cv.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv.imshow('Image', img)
key = cv.waitKey(1)
if key == 27:
break
cap.release()
cv.destroyAllWindows()
I want to pass continuous video stream to pyqt5 qlabel named label_cam to show up in ui using QThread but it keep failed showing the video stream on the qlabel. The video stream is later aimed to recognize people out.
I have been trying to connect the signal "change_Pixmap" from the VideoThread class to the "set_image" function in the "label_cam" object but i guess the flow of code or variable assign is wrong. Below is my code.
class FaceRecogScreen(QDialog):
def init(self):
super(FaceRecogScreen, self).init()
uic.loadUi("face_recog.ui", self)
self.update_date_time()
self.pushButton_back.clicked.connect(self.back_to_main3)
self.load_model()
def load_model(self):
self.prototxt = "deploy.prototxt.txt"
self.model = "res10_300x300_ssd_iter_140000.caffemodel"
print("[INFORMATION] Loading model....")
self.net = cv2.dnn.readNetFromCaffe(self.prototxt, self.model)
weight = "facenet_keras_weights.h5"
self.model2 = load_model('FaceNetModel.h5')
self.model2.load_weights(weight)
self.collected_encodings = pickle.loads(open('face_encoding.pickle', "rb").read())
infile = open('face_encoding', 'rb')
data = pickle.load(infile)
self.knownEncodings, self.knownNames = data['encodings'], data['names']
self.knownEncodings = np.array(self.knownEncodings)
self.knownNames = np.array(self.knownNames)
self.clf = svm.SVC(gamma="scale", probability=True, tol=0.01)
self.clf.fit(self.knownEncodings, self.knownNames)
# self.label_cam= VideoLabel()
self.thread = VideoThread(self)
self.thread.change_Pixmap.connect(self.set_image)
# call the run() function in VideoThread class
self.thread.start()
# self.thread.change_Pixmap.connect(self.label_cam.set_image)
# # call the run() function in VideoThread class
# self.thread.start()
# layout = self.layout()
# layout.addWidget(self.label_cam)
# self.thread.run.start()
def update_date_time(self):
# Get the current date and time
date_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self.label_datetime.setText(date_time)
# Update the date and time in the table
def back_to_main3(self):
pushButton_back = WelcomeScreen()
widget.addWidget(pushButton_back)
widget.setCurrentIndex(widget.currentIndex()+1)
def set_image(self, frame):
self.setPixmap(QPixmap.fromImage(frame))
class VideoThread(QtCore.QThread):
change_Pixmap = QtCore.pyqtSignal(QtGui.QImage)
def run(self):
cap = cv2.VideoCapture(1)
while True:
ret, frame = cap.read()
if ret:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
h, w, ch = frame.shape
bytesPerLine = ch * w
convertToQtFormat = QtGui.QImage(frame.data, w, h, bytesPerLine, QtGui.QImage.Format_RGB888)
p = convertToQtFormat.scaled(640, 480, QtCore.Qt.KeepAspectRatio)
self.change_Pixmap.emit(p)
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(
frame, (160, 160)), 1.0, (300, 300), (104, 177, 123))
self.net.setInput(blob)
detections = self.net.forward()
self.frame = frame
# self.frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.normalize(frame, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
pixels = np.expand_dims(frame, axis=0)
encode = self.model2.predict(pixels)
face_name = []
for encoding in encode:
name = self.clf.predict([encoding])
face_name.extend(name)
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence < 0.5:
continue
box = detections[0, 0, i, 3:7]*np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
text = "{:.2f}%".format(confidence*100)
y = startY - 10 if startY - 10 > 10 else startY*10
if name == 'unknown':
cv2.rectangle(frame, (startX, y), (endX, endY), (0, 0, 255), 2)
cv2.putText(frame, name, (startX, startY),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
else:
cv2.rectangle(frame, (startX, y), (endX, endY), (0, 255, 0), 2)
cv2.putText(frame, name[0], (startX, startY),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
the tensor is:
batch(3) * length(5) * dim(2)
tensor = tf.constant([[[1,1],[2,2],[3,3],[4,4],[5,5]],[[1,1],[2,2],[3,3],[4,4],[5,5]],[[1,1],[2,2],[3,3],[4,4],[5,5]]] )
and i want get more slices by length_index [0,0],[0,1] ... [3,4],[4,4] according to length_axis_index[0,1,2,3,4],the operation like
spans_length=0
with tf.variable_scope("loss_span"):
output=[]
for i in range(0,1+n_spans):
for j in range(1,seq_length):
if j + i < seq_length:
res = tf.slice(output_layer_sequence, [0, j, 0], [-1, j+i-j+1, -1])
res = tf.reduce_sum(res,axis=1)
output.append(res)
# output = tf.convert_to_tensor(output)
spans_length+=1
output = tf.convert_to_tensor(output)
vsp = tf.transpose(output, [1,0,2])#batch , spans_length,hidden_size
vsp = tf.reshape(vsp,[-1,hidden_size])#batch * span_length,hidden_size
span_logits = tf.matmul(vsp, output_span_weight, transpose_b=True) # output:[batch * spans_length,class_labels]
span_logits = tf.nn.bias_add(span_logits, output_span_bias) # output:[batch * spans_length,class_labels]
span_matrix = tf.reshape(span_logits,[-1,spans_length,class_labels],name="span_matrix_val")#[batch , spans_length,class_labels]
label_span_logists = tf.one_hot(indices=label_span,depth=class_labels, on_value=1, off_value=0, axis=-1, dtype=tf.int32)
label_span_logists=tf.cast(label_span_logists,tf.int64)
span_loss = tf.nn.softmax_cross_entropy_with_logits(logits=span_matrix, labels=label_span_logists)
span_loss = tf.reduce_mean(span_loss, name='loss_span')
when i doing such operation, training model 's time is very long;how to speed it.thanks
This code works:
# tensor = tf.constant([[[1,1],[2,2],[3,3],[4,4],[5,5]],[[1,1],[2,2],[3,3],[4,4],[5,5]],[[1,1],[2,2],[3,3],[4,4],[5,5]]] )
tensor = tf.random.uniform((3, 2000, 2))
length = tf.shape(tensor)[1].numpy()
output = []
for begins in range(length):
for size in range(length - begins):
res = tf.slice(tensor, [0, begins, 0], [-1, size + 1, -1])
res = tf.reduce_sum(res)
output.append(res)
output = tf.convert_to_tensor(output)
I tried to use tf.scan(), but I don't see any benefits:
output = tf.constant([], tf.int32)
for begins in range(length):
t = tensor[:, begins:, :]
t = tf.transpose(t, (1, 0, 2))
t = tf.scan(lambda a, x: a + x, t)
t = tf.transpose(t, (1, 0, 2))
t = tf.reduce_sum(t, [0, 2])
output = tf.concat([output, t], 0)
Edits:
Tried to apply reduce_sum() along the unused dimension [0, 2] in preprocessing:
tensor = tf.reduce_sum(tensor, [0, 2])
output = tf.constant([])
for begins in range(length):
t = tensor[begins:]
t = tf.scan(lambda a, x: a + x, t)
output = tf.concat([output, t], 0)
Still don't see performance benefits.
for i in range(0,50):
for j in range(1,200):
if j + i < 200:
res = tf.slice(output_layer_sequence, [0, j, 0], [-1, j+i-j+1, -1])
res = tf.reduce_sum(res,axis=1)
output.append(res)
output = tf.convert_to_tensor(output)
when i doing such operation, training time is very long;how to speed it.thanks
Good day,
I'm developing a deep learning model for wireless signal detection. Below is the snippet of the function that computes the model accuracy and bit error rate (BER):
from chainer.datasets import TupleDataset
import numpy as np
from chainer import cuda
from chainer import function
def get_idp_acc(model, dataset_tuple, comp_ratio, profile = None, batchsize = 128, gpu = -1):
chainer.config.train = True
xp = np if gpu < 0 else cuda.cupy
x, indices, x_zf, HtH, Hty = dataset_tuple._datasets[0], dataset_tuple._datasets[1], dataset_tuple._datasets[2], dataset_tuple._datasets[3], dataset_tuple._datasets[4]
accs = 0
BERs = 0
model.train = False
for j in range(0, len(x), batchsize):
x_batch = xp.array(x[j:j + batchsize])
indices_batch = xp.array(indices[j:j + batchsize])
x_zf_batch = xp.array(x_zf[j:j + batchsize])
HtH_batch = xp.array(HtH[j:j + batchsize])
Hty_batch = xp.array(Hty[j:j + batchsize])
if profile == None:
acc_data = model(x_batch, indices_batch, x_zf_batch, HtH_batch, Hty_batch, comp_ratio = comp_ratio,
ret_param = 'acc')
else:
acc_data = model(x_batch, indices_batch, x_zf_batch, HtH_batch, Hty_batch, comp_ratio = comp_ratio,
ret_param = 'acc', profile = profile)
acc_data.to_cpu()
acc = acc_data.data
BER = 1.0 - acc
accs += acc * len(x_batch)
BERs += BER * len(x_batch)
return (accs / len(x)) * 100.
When the code is run, I get the following error below despite having imported all the required chainer modules. I really need your help on this issue as I'm stuck for nearly two months without making any headways in my project.
Traceback (most recent call last):
File "/Users/mac/Documents/idp_detnet/examples/run_mlp.py", line 14, in <module>
mlp.run(args)
File "/Users/mac/Documents/idp_detnet/examples/mlp.py", line 39, in run
acc_dict[name], BER_dict[name] = util.sweep_idp(model, test, comp_ratios, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 107, in sweep_idp
batchsize=args.batchsize, profile=profile))
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 83, in get_idp_acc
acc_data.to_cpu()
AttributeError: 'numpy.float32' object has no attribute 'to_cpu'
Below is the additional information providing codes for model definition:
K = 10
num_layers = 3*K
def lin_soft_sign(x, t):
'''Linear soft sign activation function from the original paper Eq. (11)'''
y = -1 + F.relu(x + t)/ F.absolute(t) - F.relu(- t)/ F.absolute(t)
return y
def accuracy(x, y):
'''Computes the fraction of elements for which x and y are equal'''
return np.mean(np.equal(x, y)).astype(np.float32)
class MLP(chainer.Chain):
def __init__(self, K, coeff_generator, profiles = None, z_dims = 8*K, v_dims = 2*K):
super(MLP, self).__init__()
if profiles == None:
profiles = [(0, 10)]
self.coeff_generator = coeff_generator
self.z_dims = z_dims
self.v_dims = v_dims
self.K = K
self.profiles = profiles
self.profile = 0
with self.init_scope():
self.p0_l1 = IncompleteLinear(None, self.z_dims)
self.p1_l1 = IncompleteLinear(None, self.z_dims)
self.p2_l1 = IncompleteLinear(None, self.z_dims)
self.p0_lv = IncompleteLinear(None, self.v_dims)
self.p1_lv = IncompleteLinear(None, self.v_dims)
self.p2_lv = IncompleteLinear(None, self.v_dims)
self.p0_l3 = IncompleteLinear(None, self.K)
self.p1_l3 = IncompleteLinear(None, self.K)
self.p2_l3 = IncompleteLinear(None, self.K)
def __call__(self, x, indices, x_zf, HtH, Hty, ret_param = 'loss', profile = None, comp_ratio = None):
if profile == None:
profile = self.profile
# Form Zero-forcing detection
err_rel = F.sum((x - x_zf)**2, axis = 1)
params = layer_profile(self.coeff_generator,
*self.profiles[profile], self.z_dims,
self.v_dims, comp_ratio)
def detnet_layer(x_d, x_logit, v, z_dims, v_dims):
HtH_x = np.matmul(HtH, np.expand_dims(x_d.data, axis = 2).astype(np.float32))
HtH_x = F.squeeze(HtH_x, axis = -1)
#x_concat = np.concatenate([Hty, x, HtH_x, v], axis=1)
x_concat = F.concat([Hty, x_d, HtH_x, v], axis = 1)
if profile == 0:
z = F.relu(self.p0_l1(x_concat))
v += self.p0_lv(z, *params)
x_logit += self.p0_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
elif profile == 1:
z = F.relu(self.p1_l1(x_concat))
v += self.p1_lv(z, *params)
x_logit += self.p1_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
elif profile == 2:
z = F.relu(self.p2_l1(x_concat))
v += self.p2_lv(z, *params)
x_logit += self.p2_l3(z, *params)
x = lin_soft_sign(x_logit, F.broadcast_to(np.ones(1).astype(np.float32), x_logit.shape))
return x, x_logit, v
x_k = np.zeros((Hty.shape[0], self.K), dtype = np.float32)
x_k_logit = np.zeros((Hty.shape[0], self.K), dtype = np.float32)
v = np.zeros((Hty.shape[0], self.v_dims), dtype = np.float32)
loss = 0
mod = sg.Modulator('BPSK', K)
for k in range(1, num_layers + 1):
x_k, x_k_logit, v = detnet_layer(x_k, x_k_logit, v, self.z_dims, self.v_dims)
err = F.sum((x - x_k)**2, 1)
loss += (np.log(k)).astype(np.float32) * F.mean(err/err_rel)
report = {'loss': loss, 'acc': accuracy(mod.demodulate(x_k.data), indices)}
reporter.report(report, self)
return report[ret_param]
def report_params(self):
return ['validation/main/acc']
def param_names(self):
if len(self.profiles) > 1:
return 'IDPDETNET_{}_{}_{}_p{}'.format(self.z_dims, self.v_dims, self.coeff_generator.__name__, len(self.profiles))
return 'IDPDETNET_{}_{}_{}'.format(self.z_dims, self.v_dims, self.coeff_generator.__name__)
import os
import sys
sys.path.insert(0, os.path.abspath(
os.path.join(os.path.dirname(__file__), '..')))
import numpy as np
import visualize as vz
import idp.coeffs_generator as cg
from net import MLP
import util
K = 10
N = 4
v_dims = 2*K
z_dims = 8*K
SNR_dB_tmin = -4
SNR_dB_tmax = 24
SNR_dB_test = np.linspace(SNR_dB_tmin, SNR_dB_tmax, 8)
num_snr_test = len(SNR_dB_test)
def run(args):
train, test = util.get_dataset(args.modeltype)
names = ['all-one (standard)', 'linear']
colors = [vz.colors.all_one_lg, vz.colors.linear_lg]
models = [
MLP.MLP(K, cg.uniform, z_dims = 8*K, v_dims = 2*K),
MLP.MLP(K, cg.linear, z_dims = 8*K, v_dims = 2*K)
]
comp_ratios = np.linspace(0.1, 1.0, 20)
acc_dict = {}
BER_dict = {}
ratios_dict = {}
for i in range(num_snr_test):
for name, model in zip(names, models):
util.load_or_train_model(model, train, test, args)
acc_dict[name], BER_dict[name] = util.sweep_idp(model, test, comp_ratios, args)
ratios_dict[name] = [100. * cr for cr in comp_ratios]
filename = "IDPDETNET1_{}".format(args.modeltype)
vz.plot(ratios_dict, acc_dict, names, filename, colors = colors,
folder = args.figure_path, ext=args.ext,
title = 'IDPDETNET (BPSK)',
xlabel = 'IDP (%)',
ylabel = 'Test Accuracy (%)', ylim = (0, 100))
filename = "IDPDETNET2_{}".format(args.modeltype)
vz.plot(ratios_dict, BER_dict, names, filename, colors = colors,
folder=args.figure_path, ext=args.ext,
title='IDPDETNET (BPSK)',
xlabel='IDP (%)',
ylabel='BER (bits/sec)')
filename = "IDPDETNET3_{}".format(args.modeltype)
vz.plot(num_snr_test, BER_dict, names, filename, colors = colors,
folder = args.figure_path, ext = args.ext,
title = 'IDPDETNET (BPSK)',
xlabel = 'SNR (dB)',
ylabel = ' BER (bits/sec)')
if __name__ == '__main__':
args = util.default_parser('IDPDETNET Example').parse_args()
run(args)
Hi Seiya Tokui. Thank you for your kind input. Here is the model definition based on the above code:
model = MLP.MLP(K, cg.uniform, z_dims = 8*K, v_dims = 2*K)
OR
model = MLP.MLP(K, cg.linear, z_dims = 8*K, v_dims = 2*K)
Hi #BloodyD. Thank for your brilliant contributions. The model started training, but then later returned the following error:
1 nan nan 0.50108 5.85448
Traceback (most recent call last):
File "run_mlp.py", line 14, in <module>
mlp.run(args)
File "/Users/mac/Documents/idp_detnet/examples/mlp.py", line 38, in run
util.load_or_train_model(model, train, test, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 204, in load_or_train_model
train_model(model, train, test, args)
File "/Users/mac/Documents/idp_detnet/examples/util.py", line 184, in train_model
return eval(fp.read().replace('\n', ''))
File "<string>", line 1, in <module>
NameError: name 'NaN' is not defined
The error occurs in the last line of this snippet code below:
name = model.param_names()
save_model(model, os.path.join(args.model_path, name))
chainer.config.train = False
with open(os.path.join(args.out, 'log'), 'r') as fp:
return eval(fp.read().replace('\n', ''))
I am a beginner in tensorflow and I am trying to train a model using "mini batch". To do that I created a generator and iterate it. The problem I encounter is that, at the beginning of the epoch, the train seems fast (many batch per seconds) then the train slow down (1 batch per second) so I am wondering where I am wrong in my code but I do not find the problem.
def prepare_data(filename):
'''load file which give path and label for the data'''
f = open(filename, 'r')
data = [line.split() for line in f]
feat =[]
label=[]
for l in data:
feat.append(l[0])
label.append(l[1])
n_samples = len(feat)
shuf = list(range(n_samples))
random.shuffle(shuf)
count = Counter(label)
print(count)
feature = [feat[i] for i in shuf]
label = np.array(label, dtype=np.int)
return feature, label[shuf]
def get_specgrams(paths, nsamples=16000):
'''
Given list of paths, return specgrams.
'''
# read the wav files
wavs = [wavfile.read(x)[1] for x in paths]
# zero pad the shorter samples and cut off the long ones.
data = []
for wav in wavs:
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
data.append(d)
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return np.asarray(data)
def get_specgram(path, nsamples=16000):
'''
Given path, return specgrams.
'''
# read the wav files
wav = wavfile.read(path)[1]
# zero pad the shorter samples and cut off the long ones.
if wav.size < 16000:
d = np.pad(wav, (nsamples - wav.size, 0), mode='constant')
else:
d = wav[0:nsamples]
# get the specgram
#specgram = [signal.spectrogram(d, nperseg=256, noverlap=128)[2] for d in data]
#specgram = [s.reshape(129, 124, -1) for s in specgram]
return d
# multci classification binary labels
def one_hot_encode(labels, n_unique_labels=31):
n_labels = len(labels)
#print('number of unique labels:', n_unique_labels)
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return np.array(one_hot_encode, dtype=np.int)
#create_path_file('train/audio/')
def model(tr_features, tr_labels, ts_features, ts_labels):
# remove gpu device error
config = tf.ConfigProto(allow_soft_placement = True)
# parameters
BATCH_SIZE = 4
number_loop = math.ceil(len(tr_features)/BATCH_SIZE)
training_epochs = 10
n_dim = 16000
n_classes = 31 #len(np.unique(ts_labels))
n_hidden_units_one = 280
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.1
# get test data
ts_features, ts_labels = get_data(ts_features, ts_labels)
# Model
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
init = tf.initialize_all_variables()
# function and optimizers
cost_function = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train loop
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session(config = config) as sess:
sess.run(init)
for epoch in range(training_epochs):
print(' ## Epoch n°', epoch+1 )
batch = batch_generator(BATCH_SIZE, tr_features, tr_labels)
acc_total = 0.0
for cpt, (train_features_batch, train_labels_batch) in enumerate(batch):
_,cost = sess.run([optimizer,cost_function],feed_dict={X:train_features_batch,Y:train_labels_batch})
cost_history = np.append(cost_history,cost)
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = accuracy.eval(feed_dict={X:train_features_batch,Y:train_labels_batch})
acc_total = (acc_total*cpt + acc)/(cpt+1)
print('Train accuracy : ', acc_total, '[',str(cpt+1), '/',str(number_loop), ']' ,flush=True, end='\r')
clear_output()
print('Train accuracy : ', acc_total)
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
y_true = sess.run(tf.argmax(ts_labels,1))
print('Test accuracy: ', round(sess.run(accuracy, feed_dict={X: ts_features, Y: ts_labels}) , 3))
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
def batch_generator(batch_size, feat_path, labels):
n_sample = len(feat_path)
ite = math.ceil(n_sample/batch_size)
for i in range(0, ite):
if i == ite-1:
label = one_hot_encode(labels[-batch_size:])
feat = get_specgrams(feat_path[-batch_size:])
yield (feat, label)
else:
label = one_hot_encode(labels[i*batch_size:i*batch_size+batch_size])
feat = get_specgrams(feat_path[i*batch_size:i*batch_size+batch_size])
yield (feat, label)
def get_data(feat_path, labels):
feat = get_specgrams(feat_path)
label = one_hot_encode(labels)
return feat, label
def __main__():
print('## Load data and shuffle')
feat_path, labels = prepare_data('data_labelised2.txt')
idx = int(len(labels)*0.8)
print("## Create Model")
model(feat_path[0:idx], labels[0:idx], feat_path[idx+1:], labels[idx+1:])
with tf.device('/gpu:0'):
__main__()