Skip to content

Instantly share code, notes, and snippets.

@ochilab
Last active June 17, 2025 09:36
Show Gist options
  • Save ochilab/8d1e78557f75d73006c3c994e7740fd0 to your computer and use it in GitHub Desktop.
Save ochilab/8d1e78557f75d73006c3c994e7740fd0 to your computer and use it in GitHub Desktop.
SAM(Segment Anything)をとりあえず動かした
import cv2
import torch
from segment_anything import sam_model_registry, SamPredictor
import numpy as np
from google.colab.patches import cv2_imshow
# SAMモデルの読み込み(パスは適宜変更)
# sam_checkpoint = "sam_vit_h.pth"
sam_checkpoint = "sam_vit_h_4b8939.pth"
model_type = "vit_h"
device = "cuda" if torch.cuda.is_available() else "cpu"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)
# 動画読み込み
video_path = "IMG_4397.mov"
cap = cv2.VideoCapture(video_path)
while True:
ret, frame = cap.read()
if not ret:
break
# 縦向きに補正
frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
# RGBに変換(SAMはRGB前提)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 入力画像セット
predictor.set_image(rgb_frame)
# 例として中心点をプロンプトとして使用(中央にある物体をセグメント)
h, w, _ = rgb_frame.shape
input_point = np.array([[w//2, h//2]])
input_label = np.array([1]) # 1 = 前景(セグメントしたい)
masks, scores, logits = predictor.predict(
point_coords=input_point,
point_labels=input_label,
multimask_output=True,
)
# 最もスコアの高いマスクを可視化
best_mask = masks[np.argmax(scores)]
# マスクを使って透明オーバーレイを作成
mask_overlay = np.zeros_like(frame)
mask_overlay[best_mask] = [0, 255, 0] # 緑色でマスク可視化
annotated_frame = cv2.addWeighted(frame, 0.7, mask_overlay, 0.3, 0)
cv2_imshow(annotated_frame)
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
cap.release()
cv2.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment