Created
October 19, 2021 06:57
-
-
Save wastee/d6b56b7c06fe08deca0401296498a17d to your computer and use it in GitHub Desktop.
使用 FFmpeg 和 Python,根据视频内容来找出重复的视频
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "40f15af6", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:22.127623Z", | |
"start_time": "2021-10-19T06:56:22.086793Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import ffmpeg\n", | |
"from diffimg import diff\n", | |
"from glob import glob\n", | |
"from IPython.display import Image" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "46c97367", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:22.131302Z", | |
"start_time": "2021-10-19T06:56:22.129020Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"video_path = '/home/tee/temp/工作/视频2/*.mp4'\n", | |
"img_path = '/home/tee/temp/工作/去重截图/'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "d522b039", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:22.135920Z", | |
"start_time": "2021-10-19T06:56:22.132984Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"mp4s = glob(video_path)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0febcac9", | |
"metadata": {}, | |
"source": [ | |
"# 文件名去重检查" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "153e85d1", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:22.141599Z", | |
"start_time": "2021-10-19T06:56:22.137799Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"mp4s_name = [mp4.split('/')[-1] for mp4 in mp4s]\n", | |
"\n", | |
"assert set([x for x in mp4s_name if mp4s_name.count(x) > 1]) == set()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "205e3ff6", | |
"metadata": {}, | |
"source": [ | |
"# 取样截图" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "ba3cf54a", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:22.146402Z", | |
"start_time": "2021-10-19T06:56:22.142494Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"def save_img(q, q_name):\n", | |
" out, _ = (\n", | |
" ffmpeg\n", | |
" .input(mp4, ss=q)\n", | |
" .filter('select', 'gte(n,{})'.format(1))\n", | |
" .output('pipe:', vframes=1, format='image2', vcodec='mjpeg')\n", | |
" .global_args('-loglevel', 'error')\n", | |
" .global_args('-y')\n", | |
" .run(capture_stdout=True)\n", | |
" )\n", | |
"\n", | |
" with open(img_path + mp4.split('/')[-1][:-4] + f'_{q_name}.jpg', 'wb') as f:\n", | |
" f.write(out)\n", | |
" return out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "27166435", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:28.653726Z", | |
"start_time": "2021-10-19T06:56:22.147624Z" | |
}, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[处理中] ... /home/tee/temp/工作/视频2/横版_5.mp4\n", | |
"[处理中] ... /home/tee/temp/工作/视频2/横版_1.mp4\n", | |
"[处理中] ... /home/tee/temp/工作/视频2/横版_2.mp4\n", | |
"[处理中] ... /home/tee/temp/工作/视频2/横版_3.mp4\n", | |
"[处理中] ... /home/tee/temp/工作/视频2/横版_7.mp4\n", | |
"[处理中] ... /home/tee/temp/工作/视频2/横版_4.mp4\n", | |
"[处理中] ... /home/tee/temp/工作/视频2/横版_6.mp4\n" | |
] | |
} | |
], | |
"source": [ | |
"for mp4 in mp4s:\n", | |
" \n", | |
" print(f'[处理中] ... {mp4}')\n", | |
" \n", | |
" duration = float(ffmpeg.probe(mp4)['streams'][0]['duration'])\n", | |
" q_add_on = duration / 5\n", | |
"\n", | |
" q1 = q_add_on\n", | |
" q2 = q1 + q_add_on\n", | |
" q3 = q2 + q_add_on\n", | |
" q4 = q3 + q_add_on\n", | |
"\n", | |
" save_img(q1, 'q1')\n", | |
" save_img(q2, 'q2')\n", | |
" save_img(q3, 'q3')\n", | |
" save_img(q4, 'q4')\n", | |
" \n", | |
"# break" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "1c5ce132", | |
"metadata": {}, | |
"source": [ | |
"# diff 图片" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "e36aaac8", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:28.658794Z", | |
"start_time": "2021-10-19T06:56:28.655072Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"q1_list = glob(f'{img_path}*_q1.jpg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "f7d2bca2", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:28.671874Z", | |
"start_time": "2021-10-19T06:56:28.661737Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"def diff_list(q1_list, ori_str, dst_str):\n", | |
" q2_list = []\n", | |
" for q1 in q1_list[1:]:\n", | |
" print(f'[检查 {ori_str} 中] ... {q1}')\n", | |
" if diff(q1_list[0], q1) < 0.1:\n", | |
" q2_list.append(q1)\n", | |
" q2_list = [q1_list[0]] + q2_list\n", | |
" q2_list = [q2.replace(ori_str, dst_str) for q2 in q2_list]\n", | |
" \n", | |
" return q2_list" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "bf615c4b", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:30.071298Z", | |
"start_time": "2021-10-19T06:56:28.673494Z" | |
}, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[检查 _q1.jpg 中] ... /home/tee/temp/工作/去重截图/横版_5_q1.jpg\n", | |
"[检查 _q1.jpg 中] ... /home/tee/temp/工作/去重截图/横版_4_q1.jpg\n", | |
"[检查 _q1.jpg 中] ... /home/tee/temp/工作/去重截图/横版_7_q1.jpg\n", | |
"[检查 _q1.jpg 中] ... /home/tee/temp/工作/去重截图/横版_3_q1.jpg\n", | |
"[检查 _q1.jpg 中] ... /home/tee/temp/工作/去重截图/横版_1_q1.jpg\n", | |
"[检查 _q1.jpg 中] ... /home/tee/temp/工作/去重截图/横版_6_q1.jpg\n" | |
] | |
} | |
], | |
"source": [ | |
"q2_list = diff_list(q1_list, '_q1.jpg', '_q2.jpg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "bb3f51eb", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:30.153994Z", | |
"start_time": "2021-10-19T06:56:30.073801Z" | |
}, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[检查 _q2.jpg 中] ... /home/tee/temp/工作/去重截图/横版_7_q2.jpg\n", | |
"[检查 _q2.jpg 中] ... /home/tee/temp/工作/去重截图/横版_6_q2.jpg\n" | |
] | |
} | |
], | |
"source": [ | |
"q3_list = diff_list(q2_list, '_q2.jpg', '_q3.jpg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "78121dec", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:30.248378Z", | |
"start_time": "2021-10-19T06:56:30.155230Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[检查 _q3.jpg 中] ... /home/tee/temp/工作/去重截图/横版_7_q3.jpg\n", | |
"[检查 _q3.jpg 中] ... /home/tee/temp/工作/去重截图/横版_6_q3.jpg\n" | |
] | |
} | |
], | |
"source": [ | |
"q4_list = diff_list(q3_list, '_q3.jpg', '_q4.jpg')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3a2698ce", | |
"metadata": {}, | |
"source": [ | |
"# diff 结果" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "89d50aee", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2021-10-19T06:56:30.253653Z", | |
"start_time": "2021-10-19T06:56:30.249814Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"有重复视频\n", | |
"\n", | |
"['横版_2', '横版_7', '横版_6']\n" | |
] | |
} | |
], | |
"source": [ | |
"if q4_list == []:\n", | |
" print('无重复视频')\n", | |
"else:\n", | |
" duplicate_video = [v.split('/')[-1][:-7] for v in q4_list]\n", | |
" print('有重复视频')\n", | |
" print()\n", | |
" print(duplicate_video)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "e9542932", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment