Skip to content

Instantly share code, notes, and snippets.

@thetonus
Last active July 30, 2020 16:14
Show Gist options
  • Save thetonus/0a0c77a4d472ed90d158b502fe388166 to your computer and use it in GitHub Desktop.
Save thetonus/0a0c77a4d472ed90d158b502fe388166 to your computer and use it in GitHub Desktop.
Get audio from video where audio levels are above a certain level
import logging
import re
import subprocess
import sys
import ffmpeg
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__file__)
logger.setLevel(logging.INFO)
DEFAULT_DURATION = 0.3 # Units: seconds
DEFAULT_THRESHOLD = -36 # UNits: decibels
# Compile regexs for search ffmpeg output
SILENCE_START_RE = re.compile(" silence_start: (?P<start>[0-9]+(\.?[0-9]*))$")
SILENCE_END_RE = re.compile(" silence_end: (?P<end>[0-9]+(\.?[0-9]*)) ")
TOTAL_DURATION_RE = re.compile(
"size=[^ ]+ time=(?P<hours>[0-9]{2}):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9\.]{5}) bitrate="
)
def _logged_popen(cmd_line, *args, **kwargs):
""" Wrapper for logging command """
logger.debug("Running command: {}".format(subprocess.list2cmdline(cmd_line)))
return subprocess.Popen(cmd_line, *args, **kwargs)
def detect_silence(input_file, silence_threshold, silence_duration):
""" Run ffmpeg command to detect silence """
input_kwargs = dict()
p = _logged_popen(
(
ffmpeg.input(input_file, **input_kwargs)
.filter("silencedetect", n="{}dB".format(silence_threshold), d=silence_duration)
.output("-", format="null")
.compile()
)
+ ["-nostats"],
stderr=subprocess.PIPE,
)
output = p.communicate()[1].decode("utf-8")
if p.returncode != 0:
logger.error(output)
sys.exit(1)
logger.debug(output)
return output
def has_audio(
input_file="", silence_threshold=DEFAULT_THRESHOLD, silence_duration=DEFAULT_DURATION,
):
""" Checks to see if clip has audio """
# Call ffmpeg and detect silence in file
output = detect_silence(input_file, silence_threshold, silence_duration)
is_silence = False # Denotes if the entire segment was all silence, or if there was audio
start_time, end_time = 0.0, None
# Chunks start when silence ends, and chunks end when silence starts.
chunk_starts = []
chunk_ends = []
for line in output.splitlines():
silence_start_match = SILENCE_START_RE.search(line)
silence_end_match = SILENCE_END_RE.search(line)
total_duration_match = TOTAL_DURATION_RE.search(line)
if silence_start_match:
chunk_ends.append(float(silence_start_match.group("start")))
if len(chunk_starts) == 0:
# Started with non-silence.
chunk_starts.append(start_time or 0.0)
elif silence_end_match:
chunk_starts.append(float(silence_end_match.group("end")))
elif total_duration_match:
hours = int(total_duration_match.group("hours"))
minutes = int(total_duration_match.group("minutes"))
seconds = float(total_duration_match.group("seconds"))
end_time = hours * 3600 + minutes * 60 + seconds
if len(chunk_starts) == 0:
# No silence found.
chunk_starts.append(start_time)
if len(chunk_starts) > len(chunk_ends) > 0:
# Finished with non-silence.
chunk_ends.append(end_time or 10000000.0)
else:
# Happens when silence never stopped
is_silence = True
return list(zip(chunk_starts, chunk_ends)), is_silence
def main(
input_file="", silence_threshold=DEFAULT_THRESHOLD, silence_duration=DEFAULT_DURATION,
):
logger.info(f"Audio settings: silence_threshold={silence_threshold}, silence_duration={silence_duration}")
_, is_silence = has_audio(input_file, silence_threshold, silence_duration)
if is_silence:
logger.info("No audio found...")
return
logger.info("Audio found.")
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser(description="Split media into separate chunks wherever silence occurs")
parser.add_argument("input_file", help="Input filename (`-` for stdin)")
parser.add_argument("--silence-threshold", default=DEFAULT_THRESHOLD, type=int, help="Silence threshold (in dB)")
parser.add_argument("--silence-duration", default=DEFAULT_DURATION, type=float, help="Silence duration")
parser.add_argument("-v", dest="verbose", action="store_true", help="Verbose mode")
kwargs = vars(parser.parse_args())
if kwargs.pop("verbose"):
logging.basicConfig(level=logging.DEBUG, format="%(levels): %(message)s")
logger.setLevel(logging.DEBUG)
main(**kwargs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment