thetonus · July 30, 2020 16:14
diff --git a/main.py b/main.py
 import logging
 import re
 import subprocess
 import sys

 import ffmpeg

 logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__file__)
 logger.setLevel(logging.INFO)

 DEFAULT_DURATION = 0.3  # Units: seconds
 DEFAULT_THRESHOLD = -36  # UNits: decibels

 # Compile regexs for search ffmpeg output
 SILENCE_START_RE = re.compile(" silence_start: (?P<start>[0-9]+(\.?[0-9]*))$")
 SILENCE_END_RE = re.compile(" silence_end: (?P<end>[0-9]+(\.?[0-9]*)) ")
 TOTAL_DURATION_RE = re.compile(
    "size=[^ ]+ time=(?P<hours>[0-9]{2}):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9\.]{5}) bitrate="
 )


 def _logged_popen(cmd_line, *args, **kwargs):
    """ Wrapper for logging command """
    logger.debug("Running command: {}".format(subprocess.list2cmdline(cmd_line)))
    return subprocess.Popen(cmd_line, *args, **kwargs)


 def detect_silence(input_file, silence_threshold, silence_duration):
    """ Run ffmpeg command to detect silence """
    input_kwargs = dict()
    p = _logged_popen(
        (
            ffmpeg.input(input_file, **input_kwargs)
            .filter("silencedetect", n="{}dB".format(silence_threshold), d=silence_duration)
            .output("-", format="null")
            .compile()
        )
        + ["-nostats"],
        stderr=subprocess.PIPE,
    )
    output = p.communicate()[1].decode("utf-8")
    if p.returncode != 0:
        logger.error(output)
        sys.exit(1)

    logger.debug(output)
    return output


 def has_audio(
    input_file="", silence_threshold=DEFAULT_THRESHOLD, silence_duration=DEFAULT_DURATION,
 ):
    """ Checks to see if clip has audio """

    # Call ffmpeg and detect silence in file
    output = detect_silence(input_file, silence_threshold, silence_duration)

    is_silence = False  # Denotes if the entire segment was all silence, or if there was audio
    start_time, end_time = 0.0, None

    # Chunks start when silence ends, and chunks end when silence starts.
    chunk_starts = []
    chunk_ends = []
    for line in output.splitlines():
        silence_start_match = SILENCE_START_RE.search(line)
        silence_end_match = SILENCE_END_RE.search(line)
        total_duration_match = TOTAL_DURATION_RE.search(line)
        if silence_start_match:
            chunk_ends.append(float(silence_start_match.group("start")))
            if len(chunk_starts) == 0:
                # Started with non-silence.
                chunk_starts.append(start_time or 0.0)
        elif silence_end_match:
            chunk_starts.append(float(silence_end_match.group("end")))
        elif total_duration_match:
            hours = int(total_duration_match.group("hours"))
            minutes = int(total_duration_match.group("minutes"))
            seconds = float(total_duration_match.group("seconds"))
            end_time = hours * 3600 + minutes * 60 + seconds

    if len(chunk_starts) == 0:
        # No silence found.
        chunk_starts.append(start_time)

    if len(chunk_starts) > len(chunk_ends) > 0:
        # Finished with non-silence.
        chunk_ends.append(end_time or 10000000.0)
    else:
        # Happens when silence never stopped
        is_silence = True

    return list(zip(chunk_starts, chunk_ends)), is_silence


 def main(
    input_file="", silence_threshold=DEFAULT_THRESHOLD, silence_duration=DEFAULT_DURATION,
 ):
    logger.info(f"Audio settings: silence_threshold={silence_threshold}, silence_duration={silence_duration}")

    _, is_silence = has_audio(input_file, silence_threshold, silence_duration)

    if is_silence:
        logger.info("No audio found...")
        return

    logger.info("Audio found.")


 if __name__ == "__main__":
    from argparse import ArgumentParser

    parser = ArgumentParser(description="Split media into separate chunks wherever silence occurs")
    parser.add_argument("input_file", help="Input filename (`-` for stdin)")
    parser.add_argument("--silence-threshold", default=DEFAULT_THRESHOLD, type=int, help="Silence threshold (in dB)")
    parser.add_argument("--silence-duration", default=DEFAULT_DURATION, type=float, help="Silence duration")
    parser.add_argument("-v", dest="verbose", action="store_true", help="Verbose mode")
    kwargs = vars(parser.parse_args())

    if kwargs.pop("verbose"):
        logging.basicConfig(level=logging.DEBUG, format="%(levels): %(message)s")
        logger.setLevel(logging.DEBUG)

    main(**kwargs)
diff --git a/requirements.txt b/requirements.txt
 ffmpeg-python
	import logging
	import re
	import subprocess
	import sys

	import ffmpeg

	logging.basicConfig(level=logging.INFO, format="%(message)s")
	logger = logging.getLogger(__file__)
	logger.setLevel(logging.INFO)

	DEFAULT_DURATION = 0.3 # Units: seconds
	DEFAULT_THRESHOLD = -36 # UNits: decibels

	# Compile regexs for search ffmpeg output
	SILENCE_START_RE = re.compile(" silence_start: (?P<start>[0-9]+(\.?[0-9]*))$")
	SILENCE_END_RE = re.compile(" silence_end: (?P<end>[0-9]+(\.?[0-9]*)) ")
	TOTAL_DURATION_RE = re.compile(
	"size=[^ ]+ time=(?P<hours>[0-9]{2}):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9\.]{5}) bitrate="
	)


	def _logged_popen(cmd_line, args, *kwargs):
	""" Wrapper for logging command """
	logger.debug("Running command: {}".format(subprocess.list2cmdline(cmd_line)))
	return subprocess.Popen(cmd_line, args, *kwargs)


	def detect_silence(input_file, silence_threshold, silence_duration):
	""" Run ffmpeg command to detect silence """
	input_kwargs = dict()
	p = _logged_popen(
	(
	ffmpeg.input(input_file, **input_kwargs)
	.filter("silencedetect", n="{}dB".format(silence_threshold), d=silence_duration)
	.output("-", format="null")
	.compile()
	)
	+ ["-nostats"],
	stderr=subprocess.PIPE,
	)
	output = p.communicate()[1].decode("utf-8")
	if p.returncode != 0:
	logger.error(output)
	sys.exit(1)

	logger.debug(output)
	return output


	def has_audio(
	input_file="", silence_threshold=DEFAULT_THRESHOLD, silence_duration=DEFAULT_DURATION,
	):
	""" Checks to see if clip has audio """

	# Call ffmpeg and detect silence in file
	output = detect_silence(input_file, silence_threshold, silence_duration)

	is_silence = False # Denotes if the entire segment was all silence, or if there was audio
	start_time, end_time = 0.0, None

	# Chunks start when silence ends, and chunks end when silence starts.
	chunk_starts = []
	chunk_ends = []
	for line in output.splitlines():
	silence_start_match = SILENCE_START_RE.search(line)
	silence_end_match = SILENCE_END_RE.search(line)
	total_duration_match = TOTAL_DURATION_RE.search(line)
	if silence_start_match:
	chunk_ends.append(float(silence_start_match.group("start")))
	if len(chunk_starts) == 0:
	# Started with non-silence.
	chunk_starts.append(start_time or 0.0)
	elif silence_end_match:
	chunk_starts.append(float(silence_end_match.group("end")))
	elif total_duration_match:
	hours = int(total_duration_match.group("hours"))
	minutes = int(total_duration_match.group("minutes"))
	seconds = float(total_duration_match.group("seconds"))
	end_time = hours * 3600 + minutes * 60 + seconds

	if len(chunk_starts) == 0:
	# No silence found.
	chunk_starts.append(start_time)

	if len(chunk_starts) > len(chunk_ends) > 0:
	# Finished with non-silence.
	chunk_ends.append(end_time or 10000000.0)
	else:
	# Happens when silence never stopped
	is_silence = True

	return list(zip(chunk_starts, chunk_ends)), is_silence


	def main(
	input_file="", silence_threshold=DEFAULT_THRESHOLD, silence_duration=DEFAULT_DURATION,
	):
	logger.info(f"Audio settings: silence_threshold={silence_threshold}, silence_duration={silence_duration}")

	_, is_silence = has_audio(input_file, silence_threshold, silence_duration)

	if is_silence:
	logger.info("No audio found...")
	return

	logger.info("Audio found.")


	if __name__ == "__main__":
	from argparse import ArgumentParser

	parser = ArgumentParser(description="Split media into separate chunks wherever silence occurs")
	parser.add_argument("input_file", help="Input filename (`-` for stdin)")
	parser.add_argument("--silence-threshold", default=DEFAULT_THRESHOLD, type=int, help="Silence threshold (in dB)")
	parser.add_argument("--silence-duration", default=DEFAULT_DURATION, type=float, help="Silence duration")
	parser.add_argument("-v", dest="verbose", action="store_true", help="Verbose mode")
	kwargs = vars(parser.parse_args())

	if kwargs.pop("verbose"):
	logging.basicConfig(level=logging.DEBUG, format="%(levels): %(message)s")
	logger.setLevel(logging.DEBUG)

	main(**kwargs)