Last active
July 30, 2020 16:14
-
-
Save thetonus/0a0c77a4d472ed90d158b502fe388166 to your computer and use it in GitHub Desktop.
Get audio from video where audio levels are above a certain level
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import re | |
import subprocess | |
import sys | |
import ffmpeg | |
logging.basicConfig(level=logging.INFO, format="%(message)s") | |
logger = logging.getLogger(__file__) | |
logger.setLevel(logging.INFO) | |
DEFAULT_DURATION = 0.3 # Units: seconds | |
DEFAULT_THRESHOLD = -36 # UNits: decibels | |
# Compile regexs for search ffmpeg output | |
SILENCE_START_RE = re.compile(" silence_start: (?P<start>[0-9]+(\.?[0-9]*))$") | |
SILENCE_END_RE = re.compile(" silence_end: (?P<end>[0-9]+(\.?[0-9]*)) ") | |
TOTAL_DURATION_RE = re.compile( | |
"size=[^ ]+ time=(?P<hours>[0-9]{2}):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9\.]{5}) bitrate=" | |
) | |
def _logged_popen(cmd_line, *args, **kwargs): | |
""" Wrapper for logging command """ | |
logger.debug("Running command: {}".format(subprocess.list2cmdline(cmd_line))) | |
return subprocess.Popen(cmd_line, *args, **kwargs) | |
def detect_silence(input_file, silence_threshold, silence_duration): | |
""" Run ffmpeg command to detect silence """ | |
input_kwargs = dict() | |
p = _logged_popen( | |
( | |
ffmpeg.input(input_file, **input_kwargs) | |
.filter("silencedetect", n="{}dB".format(silence_threshold), d=silence_duration) | |
.output("-", format="null") | |
.compile() | |
) | |
+ ["-nostats"], | |
stderr=subprocess.PIPE, | |
) | |
output = p.communicate()[1].decode("utf-8") | |
if p.returncode != 0: | |
logger.error(output) | |
sys.exit(1) | |
logger.debug(output) | |
return output | |
def has_audio( | |
input_file="", silence_threshold=DEFAULT_THRESHOLD, silence_duration=DEFAULT_DURATION, | |
): | |
""" Checks to see if clip has audio """ | |
# Call ffmpeg and detect silence in file | |
output = detect_silence(input_file, silence_threshold, silence_duration) | |
is_silence = False # Denotes if the entire segment was all silence, or if there was audio | |
start_time, end_time = 0.0, None | |
# Chunks start when silence ends, and chunks end when silence starts. | |
chunk_starts = [] | |
chunk_ends = [] | |
for line in output.splitlines(): | |
silence_start_match = SILENCE_START_RE.search(line) | |
silence_end_match = SILENCE_END_RE.search(line) | |
total_duration_match = TOTAL_DURATION_RE.search(line) | |
if silence_start_match: | |
chunk_ends.append(float(silence_start_match.group("start"))) | |
if len(chunk_starts) == 0: | |
# Started with non-silence. | |
chunk_starts.append(start_time or 0.0) | |
elif silence_end_match: | |
chunk_starts.append(float(silence_end_match.group("end"))) | |
elif total_duration_match: | |
hours = int(total_duration_match.group("hours")) | |
minutes = int(total_duration_match.group("minutes")) | |
seconds = float(total_duration_match.group("seconds")) | |
end_time = hours * 3600 + minutes * 60 + seconds | |
if len(chunk_starts) == 0: | |
# No silence found. | |
chunk_starts.append(start_time) | |
if len(chunk_starts) > len(chunk_ends) > 0: | |
# Finished with non-silence. | |
chunk_ends.append(end_time or 10000000.0) | |
else: | |
# Happens when silence never stopped | |
is_silence = True | |
return list(zip(chunk_starts, chunk_ends)), is_silence | |
def main( | |
input_file="", silence_threshold=DEFAULT_THRESHOLD, silence_duration=DEFAULT_DURATION, | |
): | |
logger.info(f"Audio settings: silence_threshold={silence_threshold}, silence_duration={silence_duration}") | |
_, is_silence = has_audio(input_file, silence_threshold, silence_duration) | |
if is_silence: | |
logger.info("No audio found...") | |
return | |
logger.info("Audio found.") | |
if __name__ == "__main__": | |
from argparse import ArgumentParser | |
parser = ArgumentParser(description="Split media into separate chunks wherever silence occurs") | |
parser.add_argument("input_file", help="Input filename (`-` for stdin)") | |
parser.add_argument("--silence-threshold", default=DEFAULT_THRESHOLD, type=int, help="Silence threshold (in dB)") | |
parser.add_argument("--silence-duration", default=DEFAULT_DURATION, type=float, help="Silence duration") | |
parser.add_argument("-v", dest="verbose", action="store_true", help="Verbose mode") | |
kwargs = vars(parser.parse_args()) | |
if kwargs.pop("verbose"): | |
logging.basicConfig(level=logging.DEBUG, format="%(levels): %(message)s") | |
logger.setLevel(logging.DEBUG) | |
main(**kwargs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ffmpeg-python |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment