Skip to content

Instantly share code, notes, and snippets.

@lanzani
Last active July 19, 2025 12:13
Show Gist options
  • Save lanzani/f85175d8fbdafcabb7d480dd1bb769d9 to your computer and use it in GitHub Desktop.
Save lanzani/f85175d8fbdafcabb7d480dd1bb769d9 to your computer and use it in GitHub Desktop.
Use mediapipe to find multiple person 3d keypoints
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.framework.formats import landmark_pb2
# Model available to download here: https://developers.google.com/mediapipe/solutions/vision/pose_landmarker#models
model_path = "pose_landmarker_full.task"
video_source = 0
num_poses = 4
min_pose_detection_confidence = 0.5
min_pose_presence_confidence = 0.5
min_tracking_confidence = 0.5
def draw_landmarks_on_image(rgb_image, detection_result):
pose_landmarks_list = detection_result.pose_landmarks
annotated_image = np.copy(rgb_image)
# Loop through the detected poses to visualize.
for idx in range(len(pose_landmarks_list)):
pose_landmarks = pose_landmarks_list[idx]
pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
pose_landmarks_proto.landmark.extend([
landmark_pb2.NormalizedLandmark(
x=landmark.x,
y=landmark.y,
z=landmark.z) for landmark in pose_landmarks
])
mp.solutions.drawing_utils.draw_landmarks(
annotated_image,
pose_landmarks_proto,
mp.solutions.pose.POSE_CONNECTIONS,
mp.solutions.drawing_styles.get_default_pose_landmarks_style())
return annotated_image
to_window = None
last_timestamp_ms = 0
def print_result(detection_result: vision.PoseLandmarkerResult, output_image: mp.Image,
timestamp_ms: int):
global to_window
global last_timestamp_ms
if timestamp_ms < last_timestamp_ms:
return
last_timestamp_ms = timestamp_ms
# print("pose landmarker result: {}".format(detection_result))
to_window = cv2.cvtColor(
draw_landmarks_on_image(output_image.numpy_view(), detection_result), cv2.COLOR_RGB2BGR)
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.PoseLandmarkerOptions(
base_options=base_options,
running_mode=vision.RunningMode.LIVE_STREAM,
num_poses=num_poses,
min_pose_detection_confidence=min_pose_detection_confidence,
min_pose_presence_confidence=min_pose_presence_confidence,
min_tracking_confidence=min_tracking_confidence,
output_segmentation_masks=False,
result_callback=print_result
)
with vision.PoseLandmarker.create_from_options(options) as landmarker:
# Use OpenCV’s VideoCapture to start capturing from the webcam.
cap = cv2.VideoCapture(video_source)
# Create a loop to read the latest frame from the camera using VideoCapture#read()
while cap.isOpened():
success, image = cap.read()
if not success:
print("Image capture failed.")
break
# Convert the frame received from OpenCV to a MediaPipe’s Image object.
mp_image = mp.Image(
image_format=mp.ImageFormat.SRGB,
data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
timestamp_ms = int(cv2.getTickCount() / cv2.getTickFrequency() * 1000)
landmarker.detect_async(mp_image, timestamp_ms)
if to_window is not None:
cv2.imshow("MediaPipe Pose Landmark", to_window)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
@Rick-G-P
Copy link

Rick-G-P commented Jun 4, 2025

Hi Federico, Many thanks for posting this. Really helpful. Just to be on the safe side (as you do not specify a licence) - do I have your permission to re-use and extend this code in my own application? I will of course credit yourself as the originator. Would you also like me to clarify which parts of the code are yours and which I added (as per Apache 2.0)? My extensions concern outputting landmark data to a .csv file and dealing with cases where two people are close together and only one is detected,

Sure

Great, thanks :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment