-
-
Save lanzani/f85175d8fbdafcabb7d480dd1bb769d9 to your computer and use it in GitHub Desktop.
import cv2 | |
import numpy as np | |
import mediapipe as mp | |
from mediapipe.tasks import python | |
from mediapipe.tasks.python import vision | |
from mediapipe.framework.formats import landmark_pb2 | |
# Model available to download here: https://developers.google.com/mediapipe/solutions/vision/pose_landmarker#models | |
model_path = "pose_landmarker_full.task" | |
video_source = 0 | |
num_poses = 4 | |
min_pose_detection_confidence = 0.5 | |
min_pose_presence_confidence = 0.5 | |
min_tracking_confidence = 0.5 | |
def draw_landmarks_on_image(rgb_image, detection_result): | |
pose_landmarks_list = detection_result.pose_landmarks | |
annotated_image = np.copy(rgb_image) | |
# Loop through the detected poses to visualize. | |
for idx in range(len(pose_landmarks_list)): | |
pose_landmarks = pose_landmarks_list[idx] | |
pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList() | |
pose_landmarks_proto.landmark.extend([ | |
landmark_pb2.NormalizedLandmark( | |
x=landmark.x, | |
y=landmark.y, | |
z=landmark.z) for landmark in pose_landmarks | |
]) | |
mp.solutions.drawing_utils.draw_landmarks( | |
annotated_image, | |
pose_landmarks_proto, | |
mp.solutions.pose.POSE_CONNECTIONS, | |
mp.solutions.drawing_styles.get_default_pose_landmarks_style()) | |
return annotated_image | |
to_window = None | |
last_timestamp_ms = 0 | |
def print_result(detection_result: vision.PoseLandmarkerResult, output_image: mp.Image, | |
timestamp_ms: int): | |
global to_window | |
global last_timestamp_ms | |
if timestamp_ms < last_timestamp_ms: | |
return | |
last_timestamp_ms = timestamp_ms | |
# print("pose landmarker result: {}".format(detection_result)) | |
to_window = cv2.cvtColor( | |
draw_landmarks_on_image(output_image.numpy_view(), detection_result), cv2.COLOR_RGB2BGR) | |
base_options = python.BaseOptions(model_asset_path=model_path) | |
options = vision.PoseLandmarkerOptions( | |
base_options=base_options, | |
running_mode=vision.RunningMode.LIVE_STREAM, | |
num_poses=num_poses, | |
min_pose_detection_confidence=min_pose_detection_confidence, | |
min_pose_presence_confidence=min_pose_presence_confidence, | |
min_tracking_confidence=min_tracking_confidence, | |
output_segmentation_masks=False, | |
result_callback=print_result | |
) | |
with vision.PoseLandmarker.create_from_options(options) as landmarker: | |
# Use OpenCV’s VideoCapture to start capturing from the webcam. | |
cap = cv2.VideoCapture(video_source) | |
# Create a loop to read the latest frame from the camera using VideoCapture#read() | |
while cap.isOpened(): | |
success, image = cap.read() | |
if not success: | |
print("Image capture failed.") | |
break | |
# Convert the frame received from OpenCV to a MediaPipe’s Image object. | |
mp_image = mp.Image( | |
image_format=mp.ImageFormat.SRGB, | |
data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
timestamp_ms = int(cv2.getTickCount() / cv2.getTickFrequency() * 1000) | |
landmarker.detect_async(mp_image, timestamp_ms) | |
if to_window is not None: | |
cv2.imshow("MediaPipe Pose Landmark", to_window) | |
if cv2.waitKey(1) & 0xFF == ord('q'): | |
break | |
cap.release() | |
cv2.destroyAllWindows() |
hey @lanzani ,
Thanks for sharing the script! I tried running it on my mac and when I have 2 people in camera frame, the skeletal landmarks keep switching between the 2 :(
based on what I read from this post and also the model info document , Pose Landmarker from mediapipe cannot detect multiple people in a single image. Even though you can set the num_poses parameter in options, the model itself only works when there is a single person present in the image.
just curious if the script worked on your end to output more than 1 person's skeletal landmarks?
Hi Federico,
Many thanks for posting this. Really helpful. Just to be on the safe side (as you do not specify a licence) - do I have your permission to re-use and extend this code in my own application? I will of course credit yourself as the originator. Would you also like me to clarify which parts of the code are yours and which I added (as per Apache 2.0)?
My extensions concern outputting landmark data to a .csv file and dealing with cases where two people are close together and only one is detected,
hey @lanzani , Thanks for sharing the script! I tried running it on my mac and when I have 2 people in camera frame, the skeletal landmarks keep switching between the 2 :(
I had the same problem with OpenPose. I ended up writing a postprocessor which addressed the switching by matching the keypoints for each person to the best fit in the previous frame, among other tweaks. I will share this when I have tidied it up!
based on what I read from this post and also the model info document , Pose Landmarker from mediapipe cannot detect multiple people in a single image. Even though you can set the num_poses parameter in options, the model itself only works when there is a single person present in the image.
just curious if the script worked on your end to output more than 1 person's skeletal landmarks?
i believe Mediapipe can detect multiple people in a single image but, as with video, only if they are far enough apart. I haven't quantified 'far enough' but it seems to be about the width of a person in the image.
hey @lanzani , Thanks for sharing the script! I tried running it on my mac and when I have 2 people in camera frame, the skeletal landmarks keep switching between the 2 :(
based on what I read from this post and also the model info document , Pose Landmarker from mediapipe cannot detect multiple people in a single image. Even though you can set the num_poses parameter in options, the model itself only works when there is a single person present in the image.
just curious if the script worked on your end to output more than 1 person's skeletal landmarks?
Hello! From what I remember, it worked. Keep in mind that I'm not developing with mediapipe since then, so probably something has changed :)
Hi Federico, Many thanks for posting this. Really helpful. Just to be on the safe side (as you do not specify a licence) - do I have your permission to re-use and extend this code in my own application? I will of course credit yourself as the originator. Would you also like me to clarify which parts of the code are yours and which I added (as per Apache 2.0)? My extensions concern outputting landmark data to a .csv file and dealing with cases where two people are close together and only one is detected,
Sure
hey @lanzani , Thanks for sharing the script! I tried running it on my mac and when I have 2 people in camera frame, the skeletal landmarks keep switching between the 2 :(
I had the same problem with OpenPose. I ended up writing a postprocessor which addressed the switching by matching the keypoints for each person to the best fit in the previous frame, among other tweaks. I will share this when I have tidied it up!
based on what I read from this post and also the model info document , Pose Landmarker from mediapipe cannot detect multiple people in a single image. Even though you can set the num_poses parameter in options, the model itself only works when there is a single person present in the image.
just curious if the script worked on your end to output more than 1 person's skeletal landmarks?i believe Mediapipe can detect multiple people in a single image but, as with video, only if they are far enough apart. I haven't quantified 'far enough' but it seems to be about the width of a person in the image.
Unfortunately I'm not developing with mediapipe anymore, so I'm not much help :)
Hi Federico, Many thanks for posting this. Really helpful. Just to be on the safe side (as you do not specify a licence) - do I have your permission to re-use and extend this code in my own application? I will of course credit yourself as the originator. Would you also like me to clarify which parts of the code are yours and which I added (as per Apache 2.0)? My extensions concern outputting landmark data to a .csv file and dealing with cases where two people are close together and only one is detected,
Sure
Great, thanks :)
did it work? I'm gonna convert it into mediapipe flugin unity