Created
February 1, 2026 17:15
-
-
Save huangsam/f3398c03eb2ea4ce06fb07e2e456b132 to your computer and use it in GitHub Desktop.
Scenery detection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <pybind11/pybind11.h> | |
| #include <pybind11/stl.h> | |
| #include "scenery_detector.cpp" | |
| namespace py = pybind11; | |
| PYBIND11_MODULE(_scenic_detection, m) { | |
| m.doc() = "Scenic change detection for videos using OpenCV and optical flow"; | |
| // Binding for SceneChangeInfo struct | |
| py::class_<SceneryChangeDetector::SceneChangeInfo>(m, "SceneChangeInfo") | |
| .def(py::init<>()) | |
| .def_readwrite("frame_indices", &SceneryChangeDetector::SceneChangeInfo::frameIndices) | |
| .def_readwrite("timestamps", &SceneryChangeDetector::SceneChangeInfo::timestamps) | |
| .def_readwrite("scores", &SceneryChangeDetector::SceneChangeInfo::scores) | |
| .def_readwrite("fps", &SceneryChangeDetector::SceneChangeInfo::fps) | |
| .def_readwrite("total_frames", &SceneryChangeDetector::SceneChangeInfo::totalFrames) | |
| .def("__repr__", [](const SceneryChangeDetector::SceneChangeInfo& self) { | |
| return "<SceneChangeInfo frames=" + std::to_string(self.frameIndices.size()) + | |
| " fps=" + std::to_string(self.fps) + ">"; | |
| }); | |
| // Binding for SceneryChangeDetector class | |
| py::class_<SceneryChangeDetector>(m, "SceneryChangeDetector") | |
| .def(py::init<float, float, int, float>(), | |
| py::arg("histogram_threshold") = 0.15f, | |
| py::arg("frame_diff_threshold") = 20.0f, | |
| py::arg("blur_kernel") = 5, | |
| py::arg("downscale_factor") = 0.5f, | |
| "Initialize the scenery change detector with tuning parameters") | |
| .def("detect_scene_changes", &SceneryChangeDetector::detectSceneChanges, | |
| py::arg("video_path"), | |
| py::arg("histogram_weight") = 0.4f, | |
| py::arg("optical_weight") = 0.6f, | |
| "Detect frame indices where scenery changes occur\n\n" | |
| "Args:\n" | |
| " video_path: Path to the video file\n" | |
| " histogram_weight: Weight for histogram distance (0.0-1.0)\n" | |
| " optical_weight: Weight for optical flow magnitude (0.0-1.0)\n" | |
| "Returns:\n" | |
| " List of frame indices where changes detected") | |
| .def("analyze_video", &SceneryChangeDetector::analyzeVideo, | |
| py::arg("video_path"), | |
| py::arg("histogram_weight") = 0.4f, | |
| py::arg("optical_weight") = 0.6f, | |
| py::arg("return_scores") = true, | |
| "Analyze video and return detailed change information\n\n" | |
| "Args:\n" | |
| " video_path: Path to the video file\n" | |
| " histogram_weight: Weight for histogram distance\n" | |
| " optical_weight: Weight for optical flow magnitude\n" | |
| " return_scores: Whether to compute change scores\n" | |
| "Returns:\n" | |
| " SceneChangeInfo object with frames, timestamps, and fps") | |
| .def("compute_histogram_distance", &SceneryChangeDetector::computeHistogramDistance, | |
| py::arg("frame1"), | |
| py::arg("frame2"), | |
| "Compute Bhattacharyya distance between two frames\n\n" | |
| "Args:\n" | |
| " frame1: First frame as numpy array (BGR)\n" | |
| " frame2: Second frame as numpy array (BGR)\n" | |
| "Returns:\n" | |
| " Distance score (0.0 = identical, ~1.0 = very different)") | |
| .def("compute_optical_flow_magnitude", &SceneryChangeDetector::computeOpticalFlowMagnitude, | |
| py::arg("frame1"), | |
| py::arg("frame2"), | |
| "Compute mean optical flow magnitude between two frames\n\n" | |
| "Args:\n" | |
| " frame1: First frame as numpy array (BGR)\n" | |
| " frame2: Second frame as numpy array (BGR)\n" | |
| "Returns:\n" | |
| " Mean magnitude of optical flow") | |
| .def("compute_frame_difference", &SceneryChangeDetector::computeFrameDifference, | |
| py::arg("frame1"), | |
| py::arg("frame2"), | |
| "Compute RMSE-based frame difference\n\n" | |
| "Args:\n" | |
| " frame1: First frame as numpy array (BGR)\n" | |
| " frame2: Second frame as numpy array (BGR)\n" | |
| "Returns:\n" | |
| " Root mean squared error between frames") | |
| .def("set_histogram_threshold", &SceneryChangeDetector::setHistogramThreshold) | |
| .def("set_frame_diff_threshold", &SceneryChangeDetector::setFrameDiffThreshold) | |
| .def("set_blur_kernel_size", &SceneryChangeDetector::setBlurKernelSize) | |
| .def("set_downscale_factor", &SceneryChangeDetector::setDownscaleFactor) | |
| .def("get_histogram_threshold", &SceneryChangeDetector::getHistogramThreshold) | |
| .def("get_frame_diff_threshold", &SceneryChangeDetector::getFrameDiffThreshold) | |
| .def("get_blur_kernel_size", &SceneryChangeDetector::getBlurKernelSize) | |
| .def("get_downscale_factor", &SceneryChangeDetector::getDownscaleFactor); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Wrapper script for scenic detection that sets up proper library paths | |
| # Set DYLD_LIBRARY_PATH for Homebrew LLVM on macOS | |
| if [[ "$OSTYPE" == "darwin"* ]]; then | |
| export DYLD_LIBRARY_PATH="/opt/homebrew/opt/llvm/lib/c++${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH}" | |
| fi | |
| # Get the directory where this script is located | |
| SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |
| # Run with uv, suppressing dyld and objc warnings | |
| cd "$SCRIPT_DIR" | |
| exec uv run python test_short_video.py "$@" 2> >(grep -v "^dyld\[" | grep -v "^objc\[" >&2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <opencv2/opencv.hpp> | |
| #include <opencv2/video/tracking.hpp> | |
| #include <vector> | |
| #include <cmath> | |
| #include <algorithm> | |
| class SceneryChangeDetector { | |
| private: | |
| // Configuration parameters | |
| float histogramThreshold; | |
| float frameDiffThreshold; | |
| int blurKernelSize; | |
| float downscaleFactor; | |
| public: | |
| SceneryChangeDetector(float hist_thresh = 0.15f, | |
| float frame_diff_thresh = 20.0f, | |
| int blur_kernel = 5, | |
| float downscale = 0.5f) | |
| : histogramThreshold(hist_thresh), | |
| frameDiffThreshold(frame_diff_thresh), | |
| blurKernelSize(blur_kernel), | |
| downscaleFactor(downscale) { | |
| // Ensure blur kernel is odd | |
| if (blurKernelSize % 2 == 0) { | |
| blurKernelSize++; | |
| } | |
| } | |
| // Compute histogram comparison score between two frames | |
| float computeHistogramDistance(const cv::Mat& frame1, const cv::Mat& frame2) { | |
| cv::Mat hsv1, hsv2; | |
| cv::cvtColor(frame1, hsv1, cv::COLOR_BGR2HSV); | |
| cv::cvtColor(frame2, hsv2, cv::COLOR_BGR2HSV); | |
| // Compute histograms | |
| int hbins = 32, sbins = 32; | |
| int histSize[] = { hbins, sbins }; | |
| float hrange[] = { 0, 180 }; | |
| float srange[] = { 0, 256 }; | |
| const float* ranges[] = { hrange, srange }; | |
| int channels[] = { 0, 1 }; | |
| cv::Mat hist1, hist2; | |
| cv::calcHist(&hsv1, 1, channels, cv::Mat(), hist1, 2, histSize, ranges); | |
| cv::calcHist(&hsv2, 1, channels, cv::Mat(), hist2, 2, histSize, ranges); | |
| cv::normalize(hist1, hist1, 1, 0, cv::NORM_L2); | |
| cv::normalize(hist2, hist2, 1, 0, cv::NORM_L2); | |
| // Use Bhattacharyya distance | |
| return cv::compareHist(hist1, hist2, cv::HISTCMP_BHATTACHARYYA); | |
| } | |
| // Compute frame difference using optical flow | |
| float computeOpticalFlowMagnitude(const cv::Mat& frame1, const cv::Mat& frame2) { | |
| cv::Mat gray1, gray2, flow; | |
| cv::cvtColor(frame1, gray1, cv::COLOR_BGR2GRAY); | |
| cv::cvtColor(frame2, gray2, cv::COLOR_BGR2GRAY); | |
| // Downscale for faster computation | |
| cv::Mat small1, small2; | |
| int width = static_cast<int>(gray1.cols * downscaleFactor); | |
| int height = static_cast<int>(gray1.rows * downscaleFactor); | |
| cv::resize(gray1, small1, cv::Size(width, height)); | |
| cv::resize(gray2, small2, cv::Size(width, height)); | |
| // Compute dense optical flow using Farneback method | |
| cv::calcOpticalFlowFarneback(small1, small2, flow, 0.5, 3, 15, 3, 5, 1.2, 0); | |
| // Compute magnitude statistics | |
| std::vector<cv::Mat> channels; | |
| cv::split(flow, channels); | |
| cv::Mat magnitude, angle; | |
| cv::cartToPolar(channels[0], channels[1], magnitude, angle); | |
| // Return mean optical flow magnitude | |
| cv::Scalar mean = cv::mean(magnitude); | |
| return static_cast<float>(mean[0]); | |
| } | |
| // Compute frame difference (MSE-based) | |
| float computeFrameDifference(const cv::Mat& frame1, const cv::Mat& frame2) { | |
| cv::Mat diff; | |
| cv::absdiff(frame1, frame2, diff); | |
| diff.convertTo(diff, CV_32F); | |
| cv::Mat diffSquared = diff.mul(diff); | |
| cv::Scalar meanSquaredError = cv::mean(diffSquared); | |
| // Return root mean squared error | |
| float mse = (meanSquaredError[0] + meanSquaredError[1] + meanSquaredError[2]) / 3.0f; | |
| return std::sqrt(mse); | |
| } | |
| // Detect scenery changes in a video | |
| std::vector<int> detectSceneChanges(const std::string& videoPath, | |
| float combine_histogram_weight = 0.4f, | |
| float combine_optical_weight = 0.6f) { | |
| cv::VideoCapture cap(videoPath); | |
| if (!cap.isOpened()) { | |
| throw std::runtime_error("Failed to open video file: " + videoPath); | |
| } | |
| std::vector<int> changeFrames; | |
| cv::Mat prevFrame, currFrame; | |
| int frameCount = 0; | |
| // Read first frame | |
| if (!cap.read(prevFrame)) { | |
| throw std::runtime_error("Failed to read first frame from video"); | |
| } | |
| // Preprocess first frame | |
| cv::Mat processedPrev; | |
| preprocessFrame(prevFrame, processedPrev); | |
| frameCount++; | |
| // Process remaining frames | |
| while (cap.read(currFrame)) { | |
| cv::Mat processedCurr; | |
| preprocessFrame(currFrame, processedCurr); | |
| // Compute multiple metrics | |
| float histDist = computeHistogramDistance(processedPrev, processedCurr); | |
| float opticalMag = computeOpticalFlowMagnitude(processedPrev, processedCurr); | |
| // Normalize optical magnitude (typically 0-255) | |
| float normalizedOptical = std::min(opticalMag / 50.0f, 1.0f); | |
| // Combine scores | |
| float combinedScore = (combine_histogram_weight * histDist) + | |
| (combine_optical_weight * normalizedOptical); | |
| // Detect scene change | |
| if (combinedScore > histogramThreshold) { | |
| changeFrames.push_back(frameCount); | |
| } | |
| processedPrev = processedCurr; | |
| frameCount++; | |
| } | |
| cap.release(); | |
| return changeFrames; | |
| } | |
| // Batch process and get statistics | |
| struct SceneChangeInfo { | |
| std::vector<int> frameIndices; | |
| std::vector<double> timestamps; | |
| std::vector<float> scores; | |
| double fps; | |
| int totalFrames; | |
| }; | |
| SceneChangeInfo analyzeVideo(const std::string& videoPath, | |
| float combine_histogram_weight = 0.4f, | |
| float combine_optical_weight = 0.6f, | |
| bool returnScores = true) { | |
| cv::VideoCapture cap(videoPath); | |
| if (!cap.isOpened()) { | |
| throw std::runtime_error("Failed to open video file: " + videoPath); | |
| } | |
| SceneChangeInfo info; | |
| info.fps = cap.get(cv::CAP_PROP_FPS); | |
| info.totalFrames = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_COUNT)); | |
| cv::Mat prevFrame, currFrame; | |
| int frameCount = 0; | |
| // Read first frame | |
| if (!cap.read(prevFrame)) { | |
| throw std::runtime_error("Failed to read first frame from video"); | |
| } | |
| cv::Mat processedPrev; | |
| preprocessFrame(prevFrame, processedPrev); | |
| frameCount++; | |
| // Process remaining frames | |
| while (cap.read(currFrame)) { | |
| cv::Mat processedCurr; | |
| preprocessFrame(currFrame, processedCurr); | |
| float histDist = computeHistogramDistance(processedPrev, processedCurr); | |
| float opticalMag = computeOpticalFlowMagnitude(processedPrev, processedCurr); | |
| float normalizedOptical = std::min(opticalMag / 50.0f, 1.0f); | |
| float combinedScore = (combine_histogram_weight * histDist) + | |
| (combine_optical_weight * normalizedOptical); | |
| if (combinedScore > histogramThreshold) { | |
| info.frameIndices.push_back(frameCount); | |
| info.timestamps.push_back(frameCount / info.fps); | |
| if (returnScores) { | |
| info.scores.push_back(combinedScore); | |
| } | |
| } | |
| processedPrev = processedCurr; | |
| frameCount++; | |
| } | |
| cap.release(); | |
| return info; | |
| } | |
| // Setters for parameters | |
| void setHistogramThreshold(float thresh) { histogramThreshold = thresh; } | |
| void setFrameDiffThreshold(float thresh) { frameDiffThreshold = thresh; } | |
| void setBlurKernelSize(int size) { | |
| blurKernelSize = (size % 2 == 0) ? size + 1 : size; | |
| } | |
| void setDownscaleFactor(float factor) { downscaleFactor = factor; } | |
| // Getters | |
| float getHistogramThreshold() const { return histogramThreshold; } | |
| float getFrameDiffThreshold() const { return frameDiffThreshold; } | |
| int getBlurKernelSize() const { return blurKernelSize; } | |
| float getDownscaleFactor() const { return downscaleFactor; } | |
| private: | |
| // Preprocess frame: convert to smaller size, apply blur | |
| void preprocessFrame(const cv::Mat& src, cv::Mat& dst) { | |
| // Resize to smaller dimensions for faster processing | |
| int width = static_cast<int>(src.cols * downscaleFactor); | |
| int height = static_cast<int>(src.rows * downscaleFactor); | |
| cv::Mat resized; | |
| cv::resize(src, resized, cv::Size(width, height)); | |
| // Apply Gaussian blur to reduce noise | |
| cv::GaussianBlur(resized, dst, cv::Size(blurKernelSize, blurKernelSize), 0); | |
| } | |
| }; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """Quick test of scenic detection on a short video.""" | |
| import os | |
| import sys | |
| from pathlib import Path | |
| # Set DYLD_LIBRARY_PATH for Homebrew LLVM (macOS) | |
| if sys.platform == 'darwin': | |
| llvm_lib_path = '/opt/homebrew/opt/llvm/lib/c++' | |
| if Path(llvm_lib_path).exists(): | |
| current_dyld = os.environ.get('DYLD_LIBRARY_PATH', '') | |
| if llvm_lib_path not in current_dyld: | |
| os.environ['DYLD_LIBRARY_PATH'] = f"{llvm_lib_path}:{current_dyld}".rstrip(':') | |
| from workshop.scenic_detection import analyze_video_file | |
| import cv2 | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print("Usage: python test_short_video.py <video_file>") | |
| return 1 | |
| video_path = sys.argv[1] | |
| if not Path(video_path).exists(): | |
| print(f"Error: Video file not found: {video_path}") | |
| return 1 | |
| # Get video info | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| print(f"Error: Cannot open video: {video_path}") | |
| return 1 | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| duration = total_frames / fps if fps > 0 else 0 | |
| cap.release() | |
| print(f"Video: {Path(video_path).name}") | |
| print(f"Resolution: {width}x{height}") | |
| print(f"FPS: {fps:.2f}") | |
| print(f"Duration: {duration:.1f}s ({total_frames} frames)") | |
| print() | |
| print("Analyzing with fast settings (downscale=0.25)...") | |
| # Create detector with fast settings | |
| from workshop.scenic_detection import SceneryChangeAnalyzer | |
| analyzer = SceneryChangeAnalyzer( | |
| histogram_threshold=0.12, | |
| downscale_factor=0.25 # Fast for short videos | |
| ) | |
| info = analyzer.detect_changes( | |
| video_path, | |
| histogram_weight=0.4, | |
| optical_weight=0.6 | |
| ) | |
| print(f"\n✓ Analysis complete!") | |
| print(f"✓ Detected {len(info.frame_indices)} scene changes") | |
| if len(info.frame_indices) > 0: | |
| print(f"\nScene changes:") | |
| for i, (frame, timestamp, score) in enumerate( | |
| zip(info.frame_indices, info.timestamps, info.scores), 1 | |
| ): | |
| print(f" {i}. Frame {frame:6,} @ {timestamp:6.2f}s (score: {score:.3f})") | |
| else: | |
| print("\n No scene changes detected (video may be static or very smooth)") | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment