Skip to content

Instantly share code, notes, and snippets.

@huangsam
Created February 1, 2026 17:15
Show Gist options
  • Select an option

  • Save huangsam/f3398c03eb2ea4ce06fb07e2e456b132 to your computer and use it in GitHub Desktop.

Select an option

Save huangsam/f3398c03eb2ea4ce06fb07e2e456b132 to your computer and use it in GitHub Desktop.
Scenery detection
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "scenery_detector.cpp"
namespace py = pybind11;
PYBIND11_MODULE(_scenic_detection, m) {
m.doc() = "Scenic change detection for videos using OpenCV and optical flow";
// Binding for SceneChangeInfo struct
py::class_<SceneryChangeDetector::SceneChangeInfo>(m, "SceneChangeInfo")
.def(py::init<>())
.def_readwrite("frame_indices", &SceneryChangeDetector::SceneChangeInfo::frameIndices)
.def_readwrite("timestamps", &SceneryChangeDetector::SceneChangeInfo::timestamps)
.def_readwrite("scores", &SceneryChangeDetector::SceneChangeInfo::scores)
.def_readwrite("fps", &SceneryChangeDetector::SceneChangeInfo::fps)
.def_readwrite("total_frames", &SceneryChangeDetector::SceneChangeInfo::totalFrames)
.def("__repr__", [](const SceneryChangeDetector::SceneChangeInfo& self) {
return "<SceneChangeInfo frames=" + std::to_string(self.frameIndices.size()) +
" fps=" + std::to_string(self.fps) + ">";
});
// Binding for SceneryChangeDetector class
py::class_<SceneryChangeDetector>(m, "SceneryChangeDetector")
.def(py::init<float, float, int, float>(),
py::arg("histogram_threshold") = 0.15f,
py::arg("frame_diff_threshold") = 20.0f,
py::arg("blur_kernel") = 5,
py::arg("downscale_factor") = 0.5f,
"Initialize the scenery change detector with tuning parameters")
.def("detect_scene_changes", &SceneryChangeDetector::detectSceneChanges,
py::arg("video_path"),
py::arg("histogram_weight") = 0.4f,
py::arg("optical_weight") = 0.6f,
"Detect frame indices where scenery changes occur\n\n"
"Args:\n"
" video_path: Path to the video file\n"
" histogram_weight: Weight for histogram distance (0.0-1.0)\n"
" optical_weight: Weight for optical flow magnitude (0.0-1.0)\n"
"Returns:\n"
" List of frame indices where changes detected")
.def("analyze_video", &SceneryChangeDetector::analyzeVideo,
py::arg("video_path"),
py::arg("histogram_weight") = 0.4f,
py::arg("optical_weight") = 0.6f,
py::arg("return_scores") = true,
"Analyze video and return detailed change information\n\n"
"Args:\n"
" video_path: Path to the video file\n"
" histogram_weight: Weight for histogram distance\n"
" optical_weight: Weight for optical flow magnitude\n"
" return_scores: Whether to compute change scores\n"
"Returns:\n"
" SceneChangeInfo object with frames, timestamps, and fps")
.def("compute_histogram_distance", &SceneryChangeDetector::computeHistogramDistance,
py::arg("frame1"),
py::arg("frame2"),
"Compute Bhattacharyya distance between two frames\n\n"
"Args:\n"
" frame1: First frame as numpy array (BGR)\n"
" frame2: Second frame as numpy array (BGR)\n"
"Returns:\n"
" Distance score (0.0 = identical, ~1.0 = very different)")
.def("compute_optical_flow_magnitude", &SceneryChangeDetector::computeOpticalFlowMagnitude,
py::arg("frame1"),
py::arg("frame2"),
"Compute mean optical flow magnitude between two frames\n\n"
"Args:\n"
" frame1: First frame as numpy array (BGR)\n"
" frame2: Second frame as numpy array (BGR)\n"
"Returns:\n"
" Mean magnitude of optical flow")
.def("compute_frame_difference", &SceneryChangeDetector::computeFrameDifference,
py::arg("frame1"),
py::arg("frame2"),
"Compute RMSE-based frame difference\n\n"
"Args:\n"
" frame1: First frame as numpy array (BGR)\n"
" frame2: Second frame as numpy array (BGR)\n"
"Returns:\n"
" Root mean squared error between frames")
.def("set_histogram_threshold", &SceneryChangeDetector::setHistogramThreshold)
.def("set_frame_diff_threshold", &SceneryChangeDetector::setFrameDiffThreshold)
.def("set_blur_kernel_size", &SceneryChangeDetector::setBlurKernelSize)
.def("set_downscale_factor", &SceneryChangeDetector::setDownscaleFactor)
.def("get_histogram_threshold", &SceneryChangeDetector::getHistogramThreshold)
.def("get_frame_diff_threshold", &SceneryChangeDetector::getFrameDiffThreshold)
.def("get_blur_kernel_size", &SceneryChangeDetector::getBlurKernelSize)
.def("get_downscale_factor", &SceneryChangeDetector::getDownscaleFactor);
}
#!/bin/bash
# Wrapper script for scenic detection that sets up proper library paths
# Set DYLD_LIBRARY_PATH for Homebrew LLVM on macOS
if [[ "$OSTYPE" == "darwin"* ]]; then
export DYLD_LIBRARY_PATH="/opt/homebrew/opt/llvm/lib/c++${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH}"
fi
# Get the directory where this script is located
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Run with uv, suppressing dyld and objc warnings
cd "$SCRIPT_DIR"
exec uv run python test_short_video.py "$@" 2> >(grep -v "^dyld\[" | grep -v "^objc\[" >&2)
#include <opencv2/opencv.hpp>
#include <opencv2/video/tracking.hpp>
#include <vector>
#include <cmath>
#include <algorithm>
class SceneryChangeDetector {
private:
// Configuration parameters
float histogramThreshold;
float frameDiffThreshold;
int blurKernelSize;
float downscaleFactor;
public:
SceneryChangeDetector(float hist_thresh = 0.15f,
float frame_diff_thresh = 20.0f,
int blur_kernel = 5,
float downscale = 0.5f)
: histogramThreshold(hist_thresh),
frameDiffThreshold(frame_diff_thresh),
blurKernelSize(blur_kernel),
downscaleFactor(downscale) {
// Ensure blur kernel is odd
if (blurKernelSize % 2 == 0) {
blurKernelSize++;
}
}
// Compute histogram comparison score between two frames
float computeHistogramDistance(const cv::Mat& frame1, const cv::Mat& frame2) {
cv::Mat hsv1, hsv2;
cv::cvtColor(frame1, hsv1, cv::COLOR_BGR2HSV);
cv::cvtColor(frame2, hsv2, cv::COLOR_BGR2HSV);
// Compute histograms
int hbins = 32, sbins = 32;
int histSize[] = { hbins, sbins };
float hrange[] = { 0, 180 };
float srange[] = { 0, 256 };
const float* ranges[] = { hrange, srange };
int channels[] = { 0, 1 };
cv::Mat hist1, hist2;
cv::calcHist(&hsv1, 1, channels, cv::Mat(), hist1, 2, histSize, ranges);
cv::calcHist(&hsv2, 1, channels, cv::Mat(), hist2, 2, histSize, ranges);
cv::normalize(hist1, hist1, 1, 0, cv::NORM_L2);
cv::normalize(hist2, hist2, 1, 0, cv::NORM_L2);
// Use Bhattacharyya distance
return cv::compareHist(hist1, hist2, cv::HISTCMP_BHATTACHARYYA);
}
// Compute frame difference using optical flow
float computeOpticalFlowMagnitude(const cv::Mat& frame1, const cv::Mat& frame2) {
cv::Mat gray1, gray2, flow;
cv::cvtColor(frame1, gray1, cv::COLOR_BGR2GRAY);
cv::cvtColor(frame2, gray2, cv::COLOR_BGR2GRAY);
// Downscale for faster computation
cv::Mat small1, small2;
int width = static_cast<int>(gray1.cols * downscaleFactor);
int height = static_cast<int>(gray1.rows * downscaleFactor);
cv::resize(gray1, small1, cv::Size(width, height));
cv::resize(gray2, small2, cv::Size(width, height));
// Compute dense optical flow using Farneback method
cv::calcOpticalFlowFarneback(small1, small2, flow, 0.5, 3, 15, 3, 5, 1.2, 0);
// Compute magnitude statistics
std::vector<cv::Mat> channels;
cv::split(flow, channels);
cv::Mat magnitude, angle;
cv::cartToPolar(channels[0], channels[1], magnitude, angle);
// Return mean optical flow magnitude
cv::Scalar mean = cv::mean(magnitude);
return static_cast<float>(mean[0]);
}
// Compute frame difference (MSE-based)
float computeFrameDifference(const cv::Mat& frame1, const cv::Mat& frame2) {
cv::Mat diff;
cv::absdiff(frame1, frame2, diff);
diff.convertTo(diff, CV_32F);
cv::Mat diffSquared = diff.mul(diff);
cv::Scalar meanSquaredError = cv::mean(diffSquared);
// Return root mean squared error
float mse = (meanSquaredError[0] + meanSquaredError[1] + meanSquaredError[2]) / 3.0f;
return std::sqrt(mse);
}
// Detect scenery changes in a video
std::vector<int> detectSceneChanges(const std::string& videoPath,
float combine_histogram_weight = 0.4f,
float combine_optical_weight = 0.6f) {
cv::VideoCapture cap(videoPath);
if (!cap.isOpened()) {
throw std::runtime_error("Failed to open video file: " + videoPath);
}
std::vector<int> changeFrames;
cv::Mat prevFrame, currFrame;
int frameCount = 0;
// Read first frame
if (!cap.read(prevFrame)) {
throw std::runtime_error("Failed to read first frame from video");
}
// Preprocess first frame
cv::Mat processedPrev;
preprocessFrame(prevFrame, processedPrev);
frameCount++;
// Process remaining frames
while (cap.read(currFrame)) {
cv::Mat processedCurr;
preprocessFrame(currFrame, processedCurr);
// Compute multiple metrics
float histDist = computeHistogramDistance(processedPrev, processedCurr);
float opticalMag = computeOpticalFlowMagnitude(processedPrev, processedCurr);
// Normalize optical magnitude (typically 0-255)
float normalizedOptical = std::min(opticalMag / 50.0f, 1.0f);
// Combine scores
float combinedScore = (combine_histogram_weight * histDist) +
(combine_optical_weight * normalizedOptical);
// Detect scene change
if (combinedScore > histogramThreshold) {
changeFrames.push_back(frameCount);
}
processedPrev = processedCurr;
frameCount++;
}
cap.release();
return changeFrames;
}
// Batch process and get statistics
struct SceneChangeInfo {
std::vector<int> frameIndices;
std::vector<double> timestamps;
std::vector<float> scores;
double fps;
int totalFrames;
};
SceneChangeInfo analyzeVideo(const std::string& videoPath,
float combine_histogram_weight = 0.4f,
float combine_optical_weight = 0.6f,
bool returnScores = true) {
cv::VideoCapture cap(videoPath);
if (!cap.isOpened()) {
throw std::runtime_error("Failed to open video file: " + videoPath);
}
SceneChangeInfo info;
info.fps = cap.get(cv::CAP_PROP_FPS);
info.totalFrames = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_COUNT));
cv::Mat prevFrame, currFrame;
int frameCount = 0;
// Read first frame
if (!cap.read(prevFrame)) {
throw std::runtime_error("Failed to read first frame from video");
}
cv::Mat processedPrev;
preprocessFrame(prevFrame, processedPrev);
frameCount++;
// Process remaining frames
while (cap.read(currFrame)) {
cv::Mat processedCurr;
preprocessFrame(currFrame, processedCurr);
float histDist = computeHistogramDistance(processedPrev, processedCurr);
float opticalMag = computeOpticalFlowMagnitude(processedPrev, processedCurr);
float normalizedOptical = std::min(opticalMag / 50.0f, 1.0f);
float combinedScore = (combine_histogram_weight * histDist) +
(combine_optical_weight * normalizedOptical);
if (combinedScore > histogramThreshold) {
info.frameIndices.push_back(frameCount);
info.timestamps.push_back(frameCount / info.fps);
if (returnScores) {
info.scores.push_back(combinedScore);
}
}
processedPrev = processedCurr;
frameCount++;
}
cap.release();
return info;
}
// Setters for parameters
void setHistogramThreshold(float thresh) { histogramThreshold = thresh; }
void setFrameDiffThreshold(float thresh) { frameDiffThreshold = thresh; }
void setBlurKernelSize(int size) {
blurKernelSize = (size % 2 == 0) ? size + 1 : size;
}
void setDownscaleFactor(float factor) { downscaleFactor = factor; }
// Getters
float getHistogramThreshold() const { return histogramThreshold; }
float getFrameDiffThreshold() const { return frameDiffThreshold; }
int getBlurKernelSize() const { return blurKernelSize; }
float getDownscaleFactor() const { return downscaleFactor; }
private:
// Preprocess frame: convert to smaller size, apply blur
void preprocessFrame(const cv::Mat& src, cv::Mat& dst) {
// Resize to smaller dimensions for faster processing
int width = static_cast<int>(src.cols * downscaleFactor);
int height = static_cast<int>(src.rows * downscaleFactor);
cv::Mat resized;
cv::resize(src, resized, cv::Size(width, height));
// Apply Gaussian blur to reduce noise
cv::GaussianBlur(resized, dst, cv::Size(blurKernelSize, blurKernelSize), 0);
}
};
#!/usr/bin/env python3
"""Quick test of scenic detection on a short video."""
import os
import sys
from pathlib import Path
# Set DYLD_LIBRARY_PATH for Homebrew LLVM (macOS)
if sys.platform == 'darwin':
llvm_lib_path = '/opt/homebrew/opt/llvm/lib/c++'
if Path(llvm_lib_path).exists():
current_dyld = os.environ.get('DYLD_LIBRARY_PATH', '')
if llvm_lib_path not in current_dyld:
os.environ['DYLD_LIBRARY_PATH'] = f"{llvm_lib_path}:{current_dyld}".rstrip(':')
from workshop.scenic_detection import analyze_video_file
import cv2
def main():
if len(sys.argv) < 2:
print("Usage: python test_short_video.py <video_file>")
return 1
video_path = sys.argv[1]
if not Path(video_path).exists():
print(f"Error: Video file not found: {video_path}")
return 1
# Get video info
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video: {video_path}")
return 1
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
duration = total_frames / fps if fps > 0 else 0
cap.release()
print(f"Video: {Path(video_path).name}")
print(f"Resolution: {width}x{height}")
print(f"FPS: {fps:.2f}")
print(f"Duration: {duration:.1f}s ({total_frames} frames)")
print()
print("Analyzing with fast settings (downscale=0.25)...")
# Create detector with fast settings
from workshop.scenic_detection import SceneryChangeAnalyzer
analyzer = SceneryChangeAnalyzer(
histogram_threshold=0.12,
downscale_factor=0.25 # Fast for short videos
)
info = analyzer.detect_changes(
video_path,
histogram_weight=0.4,
optical_weight=0.6
)
print(f"\n✓ Analysis complete!")
print(f"✓ Detected {len(info.frame_indices)} scene changes")
if len(info.frame_indices) > 0:
print(f"\nScene changes:")
for i, (frame, timestamp, score) in enumerate(
zip(info.frame_indices, info.timestamps, info.scores), 1
):
print(f" {i}. Frame {frame:6,} @ {timestamp:6.2f}s (score: {score:.3f})")
else:
print("\n No scene changes detected (video may be static or very smooth)")
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment