akitaonrails · November 5, 2024 02:24
diff --git a/build_ffmpeg_command.py b/build_ffmpeg_command.py
 # Procedure:
 # - download the episodes you want from the Android Netflix app (there is a limit)
 # - connect your smartphone to your PC and navigate to Internal Storage/Android/data/com.netflix.mediaclient/Download/.of.
 # - fetch each directory (each directory is one episode)
 #
 # Each directory should have the following file formats:
 # - .nfv - the video stream, possibly already in H.264, unencrypted
 # - .nfa - the most difficult file to deal with, it's an unencrypted AAC file in ISO fMP4 (fragmented) format and an unknown "object type 42" that ffmpeg complains
 # - .nfs - subtitle in TTML format with custom properties.
 # - .nfi - information about the download (unknown format)
 # - .manifest - it can tell what language are the audio tracks and subtitles, which is useful to re-build the video envelope
 #
 # What this script will try to do:
 # - convert the .nfa files into a universal .wav format using Gstreamer (ffmpeg complains) on Linux
 # - convert the .nfs files into srt and then into .ass format
 # - mount a ffmpeg command line to assemble the video, audio and subtitle tracks
 #
 # Current status:
 # - the video file does play with Totem/Mplayer and VLC, but they can't skip ahead. There is some timecode or syncing issue
 # - still not sure if there is a need to remux the video track. feels like normal h.264. 
 # - just tested with one episode from one series. codecs may vary depending on when the show was first published


 import os
 import sys
 import xml.etree.ElementTree as ET
 import subprocess

 def parse_time(ttime):
    ticks = int(ttime.replace("t", ""))
    milliseconds = ticks // 10000
    hours = milliseconds // 3600000
    minutes = (milliseconds % 3600000) // 60000
    seconds = (milliseconds % 60000) // 1000
    ms = milliseconds % 1000
    return f"{hours:02}:{minutes:02}:{seconds:02},{ms:03}"

 def ttml_to_srt(ttml_file, srt_file):
    tree = ET.parse(ttml_file)
    root = tree.getroot()
    srt_output = []

    for i, p in enumerate(root.iter("{http://www.w3.org/ns/ttml}p"), start=1):
        begin = p.get("begin")
        end = p.get("end")
        text = "".join(p.itertext()).replace("\n", " ").strip()
        srt_output.append(f"{i}\n{parse_time(begin)} --> {parse_time(end)}\n{text}\n")

    with open(srt_file, "w", encoding="utf-8") as f:
        f.write("\n".join(srt_output))

 def find_files(episode_dir):
    files = {
        "manifest": None,
        "video": None,
        "audio": [],
        "subtitles": []
    }
    
    for root, _, filenames in os.walk(episode_dir):
        for filename in filenames:
            if filename.endswith(".manifest"):
                files["manifest"] = os.path.join(root, filename)
            elif filename.endswith(".nfv"):
                files["video"] = os.path.join(root, filename)
            elif filename.endswith(".nfa"):
                files["audio"].append(os.path.join(root, filename))
            elif filename.endswith(".nfs"):
                files["subtitles"].append(os.path.join(root, filename))
                
    return files

 def parse_manifest(manifest_file):
    movie_id = None
    if manifest_file:
        with open(manifest_file, 'r', encoding="utf-8") as f:
            content = f.read()
            movie_id = content.split('"movieId":"')[1].split('"')[0]
    return movie_id or "output"

 def convert_nfa_to_m4a(nfa_files, episode_dir):
    m4a_files = []
    for nfa_file in nfa_files:
        wav_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(nfa_file))[0] + ".wav")
        m4a_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(nfa_file))[0] + ".m4a")

        # Step 1: Use GStreamer to extract audio to WAV
        try:
            result = subprocess.run(
                [
                    "gst-launch-1.0", "filesrc", f"location={nfa_file}", "!",
                    "decodebin", "!", "audioconvert", "!", "audioresample", "!",
                    "wavenc", "!", f"filesink", f"location={wav_file}"
                ],
                capture_output=True, text=True
            )
            print("GStreamer output (nfa to wav):", result.stdout)
            print("GStreamer error (nfa to wav):", result.stderr)
            if result.returncode != 0:
                print(f"Error converting {nfa_file} to .wav using GStreamer.")
                continue  # Skip to the next file if this one failed
        except Exception as e:
            print(f"Exception during GStreamer nfa to wav conversion: {e}")
            continue

        # Step 2: Convert WAV to M4A with FFmpeg
        try:
            result = subprocess.run(
                ["ffmpeg", "-i", wav_file, "-c:a", "aac", "-b:a", "128k", "-movflags", "+faststart", m4a_file],
                capture_output=True, text=True
            )
            print("FFmpeg output (wav to m4a):", result.stdout)
            print("FFmpeg error (wav to m4a):", result.stderr)
            if result.returncode != 0:
                print(f"Error converting {wav_file} to .m4a.")
                continue  # Skip to the next file if this one failed
        except Exception as e:
            print(f"Exception during wav to m4a conversion: {e}")
            continue

        # Clean up intermediate WAV file
        if os.path.exists(wav_file):
            os.remove(wav_file)
        
        m4a_files.append(m4a_file)
    return m4a_files

 def convert_subtitles(subtitle_files, episode_dir):
    srt_files = []
    for subtitle_file in subtitle_files:
        srt_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(subtitle_file))[0] + ".srt")
        ttml_to_srt(subtitle_file, srt_file)
        srt_files.append(srt_file)
    return srt_files

 def build_ffmpeg_command(video_file, audio_files, subtitle_files, output_file, subtitle_languages, audio_languages):
    ffmpeg_command = ["ffmpeg", "-fflags", "+genpts", "-i", video_file, "-c:v", "libx264", "-crf", "18", "-vsync", "cfr"]

    # Add audio files with language metadata
    for i, audio_file in enumerate(audio_files):
        ffmpeg_command.extend(["-i", audio_file, "-metadata:s:a:" + str(i), "language=" + audio_languages[i]])

    # Add subtitle files with language metadata
    for i, subtitle_file in enumerate(subtitle_files):
        ffmpeg_command.extend(["-i", subtitle_file, "-metadata:s:s:" + str(i), "language=" + subtitle_languages[i]])

    # Map streams (video, audio, subtitles)
    ffmpeg_command.extend(["-map", "0:v"])
    for i in range(len(audio_files)):
        ffmpeg_command.extend(["-map", str(i + 1) + ":a"])
    for i in range(len(subtitle_files)):
        ffmpeg_command.extend(["-map", str(len(audio_files) + 1 + i) + ":s"])

    # Set codec for subtitles
    ffmpeg_command.extend(["-c:s", "ass", "-f", "matroska", output_file])

    return ffmpeg_command

 def build_ffmpeg_command2(video_file, audio_files, srt_files, output_file):
    audio_languages = ["eng", "ja"]
    subtitle_languages = ["pt-BR", "en"]

    # Start building the ffmpeg command with input files
    ffmpeg_command = ["ffmpeg", "-fflags", "+genpts", "-i", video_file]

    # Add audio files with language metadata
    for i, audio_file in enumerate(audio_files):
        ffmpeg_command.extend(["-i", audio_file])
    
    # Convert SRT to ASS and add subtitle files with metadata
    for srt_file in srt_files:
        ass_file = os.path.splitext(srt_file)[0] + ".ass"
        subprocess.run(["ffmpeg", "-i", srt_file, ass_file])
        ffmpeg_command.extend(["-i", ass_file])
    
    # Map video, audio, and subtitles
    ffmpeg_command.extend(["-map", "0:v"])
    for i in range(len(audio_files)):
        ffmpeg_command.extend(["-map", f"{i + 1}:a"])
    for i in range(len(srt_files)):
        ffmpeg_command.extend(["-map", f"{i + len(audio_files) + 1}:s"])

    # Specify codecs and formats
    ffmpeg_command.extend(["-c:v", "copy", "-c:a", "copy", "-c:s", "ass"])

    # Add metadata for languages
    for i, lang in enumerate(audio_languages[:len(audio_files)]):
        ffmpeg_command.extend(["-metadata:s:a:" + str(i), "language=" + lang])
    for i, lang in enumerate(subtitle_languages[:len(srt_files)]):
        ffmpeg_command.extend(["-metadata:s:s:" + str(i), "language=" + lang])

    # Additional flags to handle timestamp and sync issues
    ffmpeg_command.extend(["-avoid_negative_ts", "make_zero", "-f", "matroska", output_file])

    return ffmpeg_command

 def main(episode_dir):
    files = find_files(episode_dir)
    if not files["video"] or not files["audio"] or not files["subtitles"]:
        print("Missing necessary files in the provided directory.")
        return

    movie_id = parse_manifest(files["manifest"])
    output_file = f"{movie_id}.mkv"

    # Convert .nfa files to .m4a
    m4a_files = convert_nfa_to_m4a(files["audio"], episode_dir)

    # Convert subtitles to SRT
    srt_files = convert_subtitles(files["subtitles"], episode_dir)

    # Build and print the FFmpeg command
    ffmpeg_command = build_ffmpeg_command(files["video"], m4a_files, srt_files, output_file)
    print("Generated FFmpeg command:")
    print(" ".join(ffmpeg_command))

    # Optionally, run the FFmpeg command
    subprocess.run(ffmpeg_command)

 if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python build_ffmpeg_command.py /path/to/episode_folder")
    else:
        main(sys.argv[1])
	# Procedure:
	# - download the episodes you want from the Android Netflix app (there is a limit)
	# - connect your smartphone to your PC and navigate to Internal Storage/Android/data/com.netflix.mediaclient/Download/.of.
	# - fetch each directory (each directory is one episode)
	#
	# Each directory should have the following file formats:
	# - .nfv - the video stream, possibly already in H.264, unencrypted
	# - .nfa - the most difficult file to deal with, it's an unencrypted AAC file in ISO fMP4 (fragmented) format and an unknown "object type 42" that ffmpeg complains
	# - .nfs - subtitle in TTML format with custom properties.
	# - .nfi - information about the download (unknown format)
	# - .manifest - it can tell what language are the audio tracks and subtitles, which is useful to re-build the video envelope
	#
	# What this script will try to do:
	# - convert the .nfa files into a universal .wav format using Gstreamer (ffmpeg complains) on Linux
	# - convert the .nfs files into srt and then into .ass format
	# - mount a ffmpeg command line to assemble the video, audio and subtitle tracks
	#
	# Current status:
	# - the video file does play with Totem/Mplayer and VLC, but they can't skip ahead. There is some timecode or syncing issue
	# - still not sure if there is a need to remux the video track. feels like normal h.264.
	# - just tested with one episode from one series. codecs may vary depending on when the show was first published


	import os
	import sys
	import xml.etree.ElementTree as ET
	import subprocess

	def parse_time(ttime):
	ticks = int(ttime.replace("t", ""))
	milliseconds = ticks // 10000
	hours = milliseconds // 3600000
	minutes = (milliseconds % 3600000) // 60000
	seconds = (milliseconds % 60000) // 1000
	ms = milliseconds % 1000
	return f"{hours:02}:{minutes:02}:{seconds:02},{ms:03}"

	def ttml_to_srt(ttml_file, srt_file):
	tree = ET.parse(ttml_file)
	root = tree.getroot()
	srt_output = []

	for i, p in enumerate(root.iter("{http://www.w3.org/ns/ttml}p"), start=1):
	begin = p.get("begin")
	end = p.get("end")
	text = "".join(p.itertext()).replace("\n", " ").strip()
	srt_output.append(f"{i}\n{parse_time(begin)} --> {parse_time(end)}\n{text}\n")

	with open(srt_file, "w", encoding="utf-8") as f:
	f.write("\n".join(srt_output))

	def find_files(episode_dir):
	files = {
	"manifest": None,
	"video": None,
	"audio": [],
	"subtitles": []
	}

	for root, _, filenames in os.walk(episode_dir):
	for filename in filenames:
	if filename.endswith(".manifest"):
	files["manifest"] = os.path.join(root, filename)
	elif filename.endswith(".nfv"):
	files["video"] = os.path.join(root, filename)
	elif filename.endswith(".nfa"):
	files["audio"].append(os.path.join(root, filename))
	elif filename.endswith(".nfs"):
	files["subtitles"].append(os.path.join(root, filename))

	return files

	def parse_manifest(manifest_file):
	movie_id = None
	if manifest_file:
	with open(manifest_file, 'r', encoding="utf-8") as f:
	content = f.read()
	movie_id = content.split('"movieId":"')[1].split('"')[0]
	return movie_id or "output"

	def convert_nfa_to_m4a(nfa_files, episode_dir):
	m4a_files = []
	for nfa_file in nfa_files:
	wav_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(nfa_file))[0] + ".wav")
	m4a_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(nfa_file))[0] + ".m4a")

	# Step 1: Use GStreamer to extract audio to WAV
	try:
	result = subprocess.run(
	[
	"gst-launch-1.0", "filesrc", f"location={nfa_file}", "!",
	"decodebin", "!", "audioconvert", "!", "audioresample", "!",
	"wavenc", "!", f"filesink", f"location={wav_file}"
	],
	capture_output=True, text=True
	)
	print("GStreamer output (nfa to wav):", result.stdout)
	print("GStreamer error (nfa to wav):", result.stderr)
	if result.returncode != 0:
	print(f"Error converting {nfa_file} to .wav using GStreamer.")
	continue # Skip to the next file if this one failed
	except Exception as e:
	print(f"Exception during GStreamer nfa to wav conversion: {e}")
	continue

	# Step 2: Convert WAV to M4A with FFmpeg
	try:
	result = subprocess.run(
	["ffmpeg", "-i", wav_file, "-c:a", "aac", "-b:a", "128k", "-movflags", "+faststart", m4a_file],
	capture_output=True, text=True
	)
	print("FFmpeg output (wav to m4a):", result.stdout)
	print("FFmpeg error (wav to m4a):", result.stderr)
	if result.returncode != 0:
	print(f"Error converting {wav_file} to .m4a.")
	continue # Skip to the next file if this one failed
	except Exception as e:
	print(f"Exception during wav to m4a conversion: {e}")
	continue

	# Clean up intermediate WAV file
	if os.path.exists(wav_file):
	os.remove(wav_file)

	m4a_files.append(m4a_file)
	return m4a_files

	def convert_subtitles(subtitle_files, episode_dir):
	srt_files = []
	for subtitle_file in subtitle_files:
	srt_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(subtitle_file))[0] + ".srt")
	ttml_to_srt(subtitle_file, srt_file)
	srt_files.append(srt_file)
	return srt_files

	def build_ffmpeg_command(video_file, audio_files, subtitle_files, output_file, subtitle_languages, audio_languages):
	ffmpeg_command = ["ffmpeg", "-fflags", "+genpts", "-i", video_file, "-c:v", "libx264", "-crf", "18", "-vsync", "cfr"]

	# Add audio files with language metadata
	for i, audio_file in enumerate(audio_files):
	ffmpeg_command.extend(["-i", audio_file, "-metadata:s:a:" + str(i), "language=" + audio_languages[i]])

	# Add subtitle files with language metadata
	for i, subtitle_file in enumerate(subtitle_files):
	ffmpeg_command.extend(["-i", subtitle_file, "-metadata:s:s:" + str(i), "language=" + subtitle_languages[i]])

	# Map streams (video, audio, subtitles)
	ffmpeg_command.extend(["-map", "0:v"])
	for i in range(len(audio_files)):
	ffmpeg_command.extend(["-map", str(i + 1) + ":a"])
	for i in range(len(subtitle_files)):
	ffmpeg_command.extend(["-map", str(len(audio_files) + 1 + i) + ":s"])

	# Set codec for subtitles
	ffmpeg_command.extend(["-c:s", "ass", "-f", "matroska", output_file])

	return ffmpeg_command

	def build_ffmpeg_command2(video_file, audio_files, srt_files, output_file):
	audio_languages = ["eng", "ja"]
	subtitle_languages = ["pt-BR", "en"]

	# Start building the ffmpeg command with input files
	ffmpeg_command = ["ffmpeg", "-fflags", "+genpts", "-i", video_file]

	# Add audio files with language metadata
	for i, audio_file in enumerate(audio_files):
	ffmpeg_command.extend(["-i", audio_file])

	# Convert SRT to ASS and add subtitle files with metadata
	for srt_file in srt_files:
	ass_file = os.path.splitext(srt_file)[0] + ".ass"
	subprocess.run(["ffmpeg", "-i", srt_file, ass_file])
	ffmpeg_command.extend(["-i", ass_file])

	# Map video, audio, and subtitles
	ffmpeg_command.extend(["-map", "0:v"])
	for i in range(len(audio_files)):
	ffmpeg_command.extend(["-map", f"{i + 1}:a"])
	for i in range(len(srt_files)):
	ffmpeg_command.extend(["-map", f"{i + len(audio_files) + 1}:s"])

	# Specify codecs and formats
	ffmpeg_command.extend(["-c:v", "copy", "-c:a", "copy", "-c:s", "ass"])

	# Add metadata for languages
	for i, lang in enumerate(audio_languages[:len(audio_files)]):
	ffmpeg_command.extend(["-metadata:s:a:" + str(i), "language=" + lang])
	for i, lang in enumerate(subtitle_languages[:len(srt_files)]):
	ffmpeg_command.extend(["-metadata:s:s:" + str(i), "language=" + lang])

	# Additional flags to handle timestamp and sync issues
	ffmpeg_command.extend(["-avoid_negative_ts", "make_zero", "-f", "matroska", output_file])

	return ffmpeg_command

	def main(episode_dir):
	files = find_files(episode_dir)
	if not files["video"] or not files["audio"] or not files["subtitles"]:
	print("Missing necessary files in the provided directory.")
	return

	movie_id = parse_manifest(files["manifest"])
	output_file = f"{movie_id}.mkv"

	# Convert .nfa files to .m4a
	m4a_files = convert_nfa_to_m4a(files["audio"], episode_dir)

	# Convert subtitles to SRT
	srt_files = convert_subtitles(files["subtitles"], episode_dir)

	# Build and print the FFmpeg command
	ffmpeg_command = build_ffmpeg_command(files["video"], m4a_files, srt_files, output_file)
	print("Generated FFmpeg command:")
	print(" ".join(ffmpeg_command))

	# Optionally, run the FFmpeg command
	subprocess.run(ffmpeg_command)

	if __name__ == "__main__":
	if len(sys.argv) < 2:
	print("Usage: python build_ffmpeg_command.py /path/to/episode_folder")
	else:
	main(sys.argv[1])