TypeError: 'NormalizedLandmarkList' object is not iterable mediapipe - mediapipe

I need some help with this code.....,
the error is "TypeError: 'NormalizedLandmarkList' object is not iterable mediapipe".
In the 19th line of the code.
import cv2
import mediapipe as mp
import math
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.holistic
hands = mp_hands.Holistic(static_image_mode=True, )
cap = cv2.VideoCapture(0)
while True:
_, frame = cap.read()
frame = cv2.flip(frame, 1)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(frame)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
if results.left_hand_landmarks:
for hand_landmarks in results.left_hand_landmarks:
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
keypoint_pos = []
for i in range(21):
x = hand_landmarks.landmark[i].x * frame.shape[1]
y = hand_landmarks.landmark[i].y * frame.shape[0]
keypoint_pos.append((x, y))
cv2.imshow('MediaPipe Hands', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()

To access the iterable hand landmarks, we need to do the following.
for hand_landmarks in results.left_hand_landmarks.landmark
Also, make sure to set static_image_mode to False for videos as it has related frames. You can check out this GitHub issue as well.

Related

How to extract skeleton only without video in mediapipe?

import cv2
import mediapipe as mp
import numpy as np
import sys
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose
mp_drawing_styles = mp.solutions.drawing_styles
#min_Tracking_confidence = 1 for higher accuracy
pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=1)
#Import Video and Set codec
cap = cv2.VideoCapture(sys.argv[1])
# print("cap :", cap.shape)
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
if cap.isOpened() == False:
print("Error opening video stream or file")
raise TypeError
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
outdir, inputflnm = sys.argv[1][:sys.argv[1].rfind(
'/')+1], sys.argv[1][sys.argv[1].rfind('/')+1:]
inflnm, inflext = inputflnm.split('.')
out_filename = f'{outdir}{inflnm}_annotated.{inflext}'
# out = cv2.VideoWriter(out_filename, cv2.VideoWriter_fourcc(
# 'M', 'J', 'P', 'G'), 10, (frame_width, frame_height))
out = cv2.VideoWriter(out_filename, fourcc, 30, (frame_width, frame_height))
while cap.isOpened():
ret, image = cap.read()
if not ret:
break
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = pose.process(image) #core
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Render detections
mp_drawing.draw_landmarks(
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
out.write(image)
mp_drawing.plot_landmarks(
results.pose_world_landmarks, mp_pose.POSE_CONNECTIONS)
pose.close()
cap.release()
out.release()
Hello,
I would like to extract skeleton without skeleton+video.
I changed the code to using input video instead of original image in Mediapipe code.
and the result was success.
the result was the video with skeleton
plus I want to see only skeleton without video.
I tired to remove the video but i could not.
I appreciate to you if you give me any help!

multiple processing in dronekit not working

i am trying to make a code about a drone flying to multiple waypoint and the drone can't continue to the next waypoint when i not showing the red color on camera.
because the camera cv2 and the drone runs at the same time, my code runs very laggy, so i tried using multiprocessing method and modify my code. when i trying to run my new code, my multi processing doesn't work and it keeps skipping almost of my code and straight to RTL mode.
from inspect import ArgInfo
from dronekit import connect, VehicleMode, LocationGlobalRelative
from pymavlink import mavutil
from numpy import loadtxt, array
from time import sleep
import sys
import cv2
import numpy as np
import multiprocessing
cap = cv2.VideoCapture(0)
hsv_a = np.array([198, 255, 255])
hsv_b = np.array([158, 68, 137])
treshold = 150
lat = [-35.3629722, -35.3629064, -35.3634361, -35.3638474]
lon = [149.1649709, 149.1655721, 149.1657331, 149.1639733]
#vehicle = connect('udp:127.0.0.1:14551',wait_ready=True)
vehicle = connect('udp:127.0.0.1:14551',wait_ready=True)
def arm_and_takeoff(aTargetAltitude): #fungsi arming dan takeoff
print("Basic pre-arm checks")
# Don't let the user try to arm until autopilot is ready
while not(vehicle.is_armable):
print(" Waiting for vehicle to initialise...")
sleep(1)
print("Arming motors")
# Copter should arm in GUIDED mode
vehicle.mode = VehicleMode("GUIDED")
vehicle.armed = True
while not(vehicle.armed):
print(" Waiting for arming...")
sleep(1)
print("Taking off!")
vehicle.simple_takeoff(aTargetAltitude)
while True:
print(" Altitude: ", vehicle.location.global_relative_frame.alt)
#Break and return from function just below target altitude.
if (vehicle.location.global_relative_frame.alt>=aTargetAltitude*0.95):
print("Reached target altitude")
break
sleep(1)
def dist(a,z): #a=awal z=akhir
d_lat= (a.lat-z.lat)**2
d_long= (a.lon-z.lon)**2
jarak = (d_lat+d_long)**0.5
return jarak
def gerak_drone():
for i in range(0,len(lat)):
print(i)
wp = LocationGlobalRelative(lat[i],lon[i],2)
vehicle.simple_goto(wp)
sleep(1)
while (dist(vehicle.location.global_relative_frame,wp)>=0.0001):
print (str(round(dist(vehicle.location.global_relative_frame,wp)*100000,2)))
while True:
_,frame = cap.read()
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
mask = cv2.inRange(hsv, hsv_b, hsv_a)
cv2.imshow("warna", mask)
cv2.imshow("hitamPutih", gray)
cv2.imshow("apa", frame)
print(cv2.countNonZero(mask))
if cv2.waitKey(500) == 27 or cv2.countNonZero(mask) > treshold :
break
if __name__ == "_main_":
altitude = 2
lat_distance = 1
lon_distance = 1
p1 = multiprocessing.Process(target=arm_and_takeoff, args=(altitude))
p2 = multiprocessing.Process(target=dist, args=(lat_distance, lon_distance))
p3 = multiprocessing.Process(target=gerak_drone)
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()
print("Coming back")
vehicle.mode = VehicleMode("RTL")
sleep(20)
vehicle.mode = VehicleMode("LAND")
Here is my terminal result

How can I increase the resolution of my input video for my real time Object Detection API?

I am having trouble increasing the resolution in my input video for real time object detection. I have tried increasing the input resolution of the video read in, but the output gives me an error.
I am streaming the video using a webcam program
The only dimensions that work are 640 x 480. Anything bigger gives me an output error. The resolution will only stay 1920 x 1080 until before the frame window, then it switches back to 640 x 480 and gives me an error when the video saves.
import numpy as np
import tensorflow as tf
from object_detection.utils import visualization_utils as vis_util
import cv2 as cv
from time import time
import serial
refPoints = []
# draw ROI
def image_crop(event, x, y, flags, param):
global refPoints
if event == cv.EVENT_LBUTTONDOWN:
refPoints = [(x, y)]
elif event == cv.EVENT_LBUTTONUP:
refPoints.append((x, y))
# run inference on single image
def run_inference_for_single_image(image, graph, sess):
with graph.as_default():
boxes = tf.get_default_graph().get_tensor_by_name('detection_boxes:0')
scores = tf.get_default_graph().get_tensor_by_name('detection_scores:0')
classes = tf.get_default_graph().get_tensor_by_name('detection_classes:0')
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
rboxes, rscores, rclasses = sess.run([boxes, scores, classes], feed_dict={image_tensor: np.expand_dims(image, 0)})
return rboxes[0], rscores[0], rclasses[0]
def main():
# 0 - load model
PATH_TO_MODEL = 'C:\\frozen_inference_graph.pb'
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_MODEL, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# 1 - define video streamer & serial port
streamer = 0
out = cv.VideoWriter('output.avi', cv.VideoWriter_fourcc(*'XVID'), 20, (1920, 1080)) #previously 25.0, (640, 480)
port = 'COM4'
ser = serial.Serial(port, 9600)
# 2 - draw region of interest (ROI)
cap = cv.VideoCapture(cv.CAP_DSHOW)
clone = frame.copy()
cv.namedWindow('frame')
cv.resizeWindow('frame', (1920, 1080))
cv.setMouseCallback('frame', image_crop)
print(frame.shape)#
print('ROI selecting...')
while True:
cv.imshow('frame', frame)
if cv.waitKey(1) & 0xFF == ord('r'):
print('ROI selection reset.')
frame = clone.copy()
elif cv.waitKey(1) & 0xff == ord('c'):
print('ROI selected.')
cap.release()
break
if len(refPoints) == 2:
cv.rectangle(frame, refPoints[0], refPoints[1], (0, 255, 0), 2)
cv.imshow('frame', frame)
if cv.waitKey(0) & 0xFF == ord('q'):
cap.release()
cv.destroyAllWindows()
else:
print('only one ROI allowed.')
cap.release()
cv.destroyAllWindows()
return -1
# 3 - run inferences on ROI
t1 = 0
t2 = 0
t3 = 0
i = 0
cap = cv.VideoCapture(streamer)
cv.namedWindow('frame')
cv.resizeWindow('frame', (1920, 1080))
while(True):
# image reading & cropping
t0 = time()
ret, frame = cap.read()
if frame is None:
break
cropped = frame[refPoints[0][1]:refPoints[1][1], refPoints[0][0]:refPoints[1][0]]
dt = time()-t0
t1 += dt
# image inference
t0 = time()
image = cropped.copy()
boxes, scores, classes = run_inference_for_single_image(image, detection_graph, sess)
boxes = boxes[scores>0.95]
classes = classes[scores>0.95]
boxes1 = boxes[classes==1]
boxes2 = boxes[classes==2]
dt = time()-t0
t2 += dt
t0 = time()
cv.rectangle(frame, refPoints[0], refPoints[1], (0, 255, 0), 2)
if len(boxes1):
#vis_util.draw_bounding_boxes_on_image_array(image, boxes)
cv.rectangle(frame, refPoints[0], refPoints[1], (0, 0, 255), 4)
ser.write(b'1')
else:
ser.write(b'0')
#if len(boxes2):
# cv.rectangle(frame, refPoints[0], refPoints[1], (255, 0, 0), 4)
#print(frame.shape)
#print(frame.shape)#
cv.imshow('frame', frame)
out.write(frame)
dt = time()-t0
t3 += dt
i += 1
if cv.waitKey(1) & 0xFF == ord('q'):
break
print("image reading: average %f sec/frame"%(t1/i))
print("image processing: average %f sec/frame"%(t2/i))
print("image showing/saving: average %f sec/frame"%(t3/i))
ser.close()
sess.close()
cap.release()
out.release()
cv.destroyAllWindows()
if __name__ == '__main__':
main()

Trying to take pictures with Coral camera with Coral edgeTPU dev board but it is really slow

To start with, I am not a developer, but a mere automation engineer that have worked a bit with coding in Java, python, C#, C++ and C.
I am trying to make a prototype that take pictures and stores them using a digital pin on the board. Atm I can take pictures using a switch, but it is really slow(around 3 seconds pr image).
My complete system is going to be like this:
A product passes by on a conveyor and a photo cell triggers the board to take an image and store it. If an operator removes a product(because of bad quality) the image is stored in a different folder.
I started with the snapshot function shipped with Mendel and have tried to get rid off the overhead, but the Gstream and pipeline-stuff confuses me a lot.
If someone could help me with how to understand the supplied code, or how to write a minimalistic solution to take an image i would be grateful :)
I have tried to understand and use project-teachable and examples-camera from Google coral https://github.com/google-coral, but with no luck. I have had the best luck with the snapshot tool that uses snapshot.py that are referenced here https://coral.withgoogle.com/docs/camera/datasheet/#snapshot-tool
from periphery import GPIO
import time
import argparse
import contextlib
import fcntl
import os
import select
import sys
import termios
import threading
import gi
gi.require_version('Gst', '1.0')
gi.require_version('GstBase', '1.0')
from functools import partial
from gi.repository import GLib, GObject, Gst, GstBase
from PIL import Image
GObject.threads_init()
Gst.init(None)
WIDTH = 2592
HEIGHT = 1944
FILENAME_PREFIX = 'img'
FILENAME_SUFFIX = '.png'
AF_SYSFS_NODE = '/sys/module/ov5645_camera_mipi_v2/parameters/ov5645_af'
CAMERA_INIT_QUERY_SYSFS_NODE = '/sys/module/ov5645_camera_mipi_v2/parameters/ov5645_initialized'
HDMI_SYSFS_NODE = '/sys/class/drm/card0/card0-HDMI-A-1/status'
# No of initial frames to throw away before camera has stabilized
SCRAP_FRAMES = 1
SRC_WIDTH = 2592
SRC_HEIGHT = 1944
SRC_RATE = '15/1'
SRC_ELEMENT = 'v4l2src'
SINK_WIDTH = 2592
SINK_HEIGHT = 1944
SINK_ELEMENT = ('appsink name=appsink sync=false emit-signals=true '
'max-buffers=1 drop=true')
SCREEN_SINK = 'glimagesink sync=false'
FAKE_SINK = 'fakesink sync=false'
SRC_CAPS = 'video/x-raw,format=YUY2,width={width},height={height},framerate={rate}'
SINK_CAPS = 'video/x-raw,format=RGB,width={width},height={height}'
LEAKY_Q = 'queue max-size-buffers=1 leaky=downstream'
PIPELINE = '''
{src_element} ! {src_caps} ! {leaky_q} ! tee name=t
t. ! {leaky_q} ! {screen_sink}
t. ! {leaky_q} ! videoconvert ! {sink_caps} ! {sink_element}
'''
def on_bus_message(bus, message, loop):
t = message.type
if t == Gst.MessageType.EOS:
loop.quit()
elif t == Gst.MessageType.WARNING:
err, debug = message.parse_warning()
sys.stderr.write('Warning: %s: %s\n' % (err, debug))
elif t == Gst.MessageType.ERROR:
err, debug = message.parse_error()
sys.stderr.write('Error: %s: %s\n' % (err, debug))
loop.quit()
return True
def on_new_sample(sink, snapinfo):
if not snapinfo.save_frame():
# Throw away the frame
return Gst.FlowReturn.OK
sample = sink.emit('pull-sample')
buf = sample.get_buffer()
result, mapinfo = buf.map(Gst.MapFlags.READ)
if result:
imgfile = snapinfo.get_filename()
caps = sample.get_caps()
width = WIDTH
height = HEIGHT
img = Image.frombytes('RGB', (width, height), mapinfo.data, 'raw')
img.save(imgfile)
img.close()
buf.unmap(mapinfo)
return Gst.FlowReturn.OK
def run_pipeline(snapinfo):
src_caps = SRC_CAPS.format(width=SRC_WIDTH, height=SRC_HEIGHT, rate=SRC_RATE)
sink_caps = SINK_CAPS.format(width=SINK_WIDTH, height=SINK_HEIGHT)
screen_sink = FAKE_SINK
pipeline = PIPELINE.format(
leaky_q=LEAKY_Q,
src_element=SRC_ELEMENT,
src_caps=src_caps,
sink_caps=sink_caps,
sink_element=SINK_ELEMENT,
screen_sink=screen_sink)
pipeline = Gst.parse_launch(pipeline)
appsink = pipeline.get_by_name('appsink')
appsink.connect('new-sample', partial(on_new_sample, snapinfo=snapinfo))
loop = GObject.MainLoop()
# Set up a pipeline bus watch to catch errors.
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect('message', on_bus_message, loop)
# Connect the loop to the snaphelper
snapinfo.connect_loop(loop)
# Run pipeline.
pipeline.set_state(Gst.State.PLAYING)
try:
loop.run()
except:
pass
# Clean up.
pipeline.set_state(Gst.State.NULL)
while GLib.MainContext.default().iteration(False):
pass
class SnapHelper:
def __init__(self, sysfs, prefix='img', oneshot=True, suffix='jpg'):
self.prefix = prefix
self.oneshot = oneshot
self.suffix = suffix
self.snap_it = oneshot
self.num = 0
self.scrapframes = SCRAP_FRAMES
self.sysfs = sysfs
def get_filename(self):
while True:
filename = self.prefix + str(self.num).zfill(4) + '.' + self.suffix
self.num = self.num + 1
if not os.path.exists(filename):
break
return filename
#def check_af(self):
#try:
# self.sysfs.seek(0)
# v = self.sysfs.read()
# if int(v) != 0x10:
# print('NO Focus')
#except:
# pass
# def refocus(self):
# try:#
# self.sysfs.write('1')
# self.sysfs.flush()
# except:
# pass
def save_frame(self):
# We always want to throw away the initial frames to let the
# camera stabilize. This seemed empirically to be the right number
# when running on desktop.
if self.scrapframes > 0:
self.scrapframes = self.scrapframes - 1
return False
if self.snap_it:
self.snap_it = False
retval = True
else:
retval = False
if self.oneshot:
self.loop.quit()
return retval
def connect_loop(self, loop):
self.loop = loop
def take_picture(snap):
start_time = int(round(time.time()))
run_pipeline(snap)
print(time.time()- start_time)
def main():
button = GPIO(138, "in")
last_state = False
with open(AF_SYSFS_NODE, 'w+') as sysfs:
snap = SnapHelper(sysfs, 'test', 'oneshot', 'jpg')
sysfs.write('2')
while 1:
button_state = button.read()
if(button_state==True and last_state == False):
snap = SnapHelper(sysfs, 'test', 'oneshot', 'jpg')
take_picture(snap)
last_state = button_state
if __name__== "__main__":
main()
sys.exit()
Output is what i expect, but it is slow.
I switched to a USB-webcam and used the pygame library instead.

Why does OpenCV's Meanshift tracking algorithm only track object the first time?

I am running the meanshift tracking algorithm to track objects in a live stream(with webcam) in OpenCV however the algorithm only works the first time it is run and does not work when I run the program again unless I restart my computer. Why is this so?
Algorithm taken from: https://docs.opencv.org/trunk/db/df8/tutorial_py_meanshift.html
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
# take first frame of the video
ret,frame = cap.read()
# setup initial location of window
r,h,c,w = 250,90,400,125 # simply hardcoded the values
track_window = (c,r,w,h)
# set up the ROI for tracking
roi = frame[r:r+h, c:c+w]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
# Setup the termination criteria, either 10 iteration or move by atleast 1 pt
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
while(1):
ret ,frame = cap.read()
if ret == True:
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
# apply meanshift to get the new location
ret, track_window = cv2.meanShift(dst, track_window, term_crit)
# Draw it on image
x,y,w,h = track_window
img2 = cv2.rectangle(frame, (x,y), (x+w,y+h), 255,2)
cv2.imshow('img2',img2)
k = cv2.waitKey(60) & 0xff
if k == 27:
break
else:
cv2.imwrite(chr(k)+".jpg",img2)
else:
break
cv2.destroyAllWindows()
cap.release()