Created
November 5, 2024 02:24
-
-
Save akitaonrails/581f9b484cdb8577908c88c196161990 to your computer and use it in GitHub Desktop.
Try to convert Netflix videos into an open MKV format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Procedure: | |
# - download the episodes you want from the Android Netflix app (there is a limit) | |
# - connect your smartphone to your PC and navigate to Internal Storage/Android/data/com.netflix.mediaclient/Download/.of. | |
# - fetch each directory (each directory is one episode) | |
# | |
# Each directory should have the following file formats: | |
# - .nfv - the video stream, possibly already in H.264, unencrypted | |
# - .nfa - the most difficult file to deal with, it's an unencrypted AAC file in ISO fMP4 (fragmented) format and an unknown "object type 42" that ffmpeg complains | |
# - .nfs - subtitle in TTML format with custom properties. | |
# - .nfi - information about the download (unknown format) | |
# - .manifest - it can tell what language are the audio tracks and subtitles, which is useful to re-build the video envelope | |
# | |
# What this script will try to do: | |
# - convert the .nfa files into a universal .wav format using Gstreamer (ffmpeg complains) on Linux | |
# - convert the .nfs files into srt and then into .ass format | |
# - mount a ffmpeg command line to assemble the video, audio and subtitle tracks | |
# | |
# Current status: | |
# - the video file does play with Totem/Mplayer and VLC, but they can't skip ahead. There is some timecode or syncing issue | |
# - still not sure if there is a need to remux the video track. feels like normal h.264. | |
# - just tested with one episode from one series. codecs may vary depending on when the show was first published | |
import os | |
import sys | |
import xml.etree.ElementTree as ET | |
import subprocess | |
def parse_time(ttime): | |
ticks = int(ttime.replace("t", "")) | |
milliseconds = ticks // 10000 | |
hours = milliseconds // 3600000 | |
minutes = (milliseconds % 3600000) // 60000 | |
seconds = (milliseconds % 60000) // 1000 | |
ms = milliseconds % 1000 | |
return f"{hours:02}:{minutes:02}:{seconds:02},{ms:03}" | |
def ttml_to_srt(ttml_file, srt_file): | |
tree = ET.parse(ttml_file) | |
root = tree.getroot() | |
srt_output = [] | |
for i, p in enumerate(root.iter("{http://www.w3.org/ns/ttml}p"), start=1): | |
begin = p.get("begin") | |
end = p.get("end") | |
text = "".join(p.itertext()).replace("\n", " ").strip() | |
srt_output.append(f"{i}\n{parse_time(begin)} --> {parse_time(end)}\n{text}\n") | |
with open(srt_file, "w", encoding="utf-8") as f: | |
f.write("\n".join(srt_output)) | |
def find_files(episode_dir): | |
files = { | |
"manifest": None, | |
"video": None, | |
"audio": [], | |
"subtitles": [] | |
} | |
for root, _, filenames in os.walk(episode_dir): | |
for filename in filenames: | |
if filename.endswith(".manifest"): | |
files["manifest"] = os.path.join(root, filename) | |
elif filename.endswith(".nfv"): | |
files["video"] = os.path.join(root, filename) | |
elif filename.endswith(".nfa"): | |
files["audio"].append(os.path.join(root, filename)) | |
elif filename.endswith(".nfs"): | |
files["subtitles"].append(os.path.join(root, filename)) | |
return files | |
def parse_manifest(manifest_file): | |
movie_id = None | |
if manifest_file: | |
with open(manifest_file, 'r', encoding="utf-8") as f: | |
content = f.read() | |
movie_id = content.split('"movieId":"')[1].split('"')[0] | |
return movie_id or "output" | |
def convert_nfa_to_m4a(nfa_files, episode_dir): | |
m4a_files = [] | |
for nfa_file in nfa_files: | |
wav_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(nfa_file))[0] + ".wav") | |
m4a_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(nfa_file))[0] + ".m4a") | |
# Step 1: Use GStreamer to extract audio to WAV | |
try: | |
result = subprocess.run( | |
[ | |
"gst-launch-1.0", "filesrc", f"location={nfa_file}", "!", | |
"decodebin", "!", "audioconvert", "!", "audioresample", "!", | |
"wavenc", "!", f"filesink", f"location={wav_file}" | |
], | |
capture_output=True, text=True | |
) | |
print("GStreamer output (nfa to wav):", result.stdout) | |
print("GStreamer error (nfa to wav):", result.stderr) | |
if result.returncode != 0: | |
print(f"Error converting {nfa_file} to .wav using GStreamer.") | |
continue # Skip to the next file if this one failed | |
except Exception as e: | |
print(f"Exception during GStreamer nfa to wav conversion: {e}") | |
continue | |
# Step 2: Convert WAV to M4A with FFmpeg | |
try: | |
result = subprocess.run( | |
["ffmpeg", "-i", wav_file, "-c:a", "aac", "-b:a", "128k", "-movflags", "+faststart", m4a_file], | |
capture_output=True, text=True | |
) | |
print("FFmpeg output (wav to m4a):", result.stdout) | |
print("FFmpeg error (wav to m4a):", result.stderr) | |
if result.returncode != 0: | |
print(f"Error converting {wav_file} to .m4a.") | |
continue # Skip to the next file if this one failed | |
except Exception as e: | |
print(f"Exception during wav to m4a conversion: {e}") | |
continue | |
# Clean up intermediate WAV file | |
if os.path.exists(wav_file): | |
os.remove(wav_file) | |
m4a_files.append(m4a_file) | |
return m4a_files | |
def convert_subtitles(subtitle_files, episode_dir): | |
srt_files = [] | |
for subtitle_file in subtitle_files: | |
srt_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(subtitle_file))[0] + ".srt") | |
ttml_to_srt(subtitle_file, srt_file) | |
srt_files.append(srt_file) | |
return srt_files | |
def build_ffmpeg_command(video_file, audio_files, subtitle_files, output_file, subtitle_languages, audio_languages): | |
ffmpeg_command = ["ffmpeg", "-fflags", "+genpts", "-i", video_file, "-c:v", "libx264", "-crf", "18", "-vsync", "cfr"] | |
# Add audio files with language metadata | |
for i, audio_file in enumerate(audio_files): | |
ffmpeg_command.extend(["-i", audio_file, "-metadata:s:a:" + str(i), "language=" + audio_languages[i]]) | |
# Add subtitle files with language metadata | |
for i, subtitle_file in enumerate(subtitle_files): | |
ffmpeg_command.extend(["-i", subtitle_file, "-metadata:s:s:" + str(i), "language=" + subtitle_languages[i]]) | |
# Map streams (video, audio, subtitles) | |
ffmpeg_command.extend(["-map", "0:v"]) | |
for i in range(len(audio_files)): | |
ffmpeg_command.extend(["-map", str(i + 1) + ":a"]) | |
for i in range(len(subtitle_files)): | |
ffmpeg_command.extend(["-map", str(len(audio_files) + 1 + i) + ":s"]) | |
# Set codec for subtitles | |
ffmpeg_command.extend(["-c:s", "ass", "-f", "matroska", output_file]) | |
return ffmpeg_command | |
def build_ffmpeg_command2(video_file, audio_files, srt_files, output_file): | |
audio_languages = ["eng", "ja"] | |
subtitle_languages = ["pt-BR", "en"] | |
# Start building the ffmpeg command with input files | |
ffmpeg_command = ["ffmpeg", "-fflags", "+genpts", "-i", video_file] | |
# Add audio files with language metadata | |
for i, audio_file in enumerate(audio_files): | |
ffmpeg_command.extend(["-i", audio_file]) | |
# Convert SRT to ASS and add subtitle files with metadata | |
for srt_file in srt_files: | |
ass_file = os.path.splitext(srt_file)[0] + ".ass" | |
subprocess.run(["ffmpeg", "-i", srt_file, ass_file]) | |
ffmpeg_command.extend(["-i", ass_file]) | |
# Map video, audio, and subtitles | |
ffmpeg_command.extend(["-map", "0:v"]) | |
for i in range(len(audio_files)): | |
ffmpeg_command.extend(["-map", f"{i + 1}:a"]) | |
for i in range(len(srt_files)): | |
ffmpeg_command.extend(["-map", f"{i + len(audio_files) + 1}:s"]) | |
# Specify codecs and formats | |
ffmpeg_command.extend(["-c:v", "copy", "-c:a", "copy", "-c:s", "ass"]) | |
# Add metadata for languages | |
for i, lang in enumerate(audio_languages[:len(audio_files)]): | |
ffmpeg_command.extend(["-metadata:s:a:" + str(i), "language=" + lang]) | |
for i, lang in enumerate(subtitle_languages[:len(srt_files)]): | |
ffmpeg_command.extend(["-metadata:s:s:" + str(i), "language=" + lang]) | |
# Additional flags to handle timestamp and sync issues | |
ffmpeg_command.extend(["-avoid_negative_ts", "make_zero", "-f", "matroska", output_file]) | |
return ffmpeg_command | |
def main(episode_dir): | |
files = find_files(episode_dir) | |
if not files["video"] or not files["audio"] or not files["subtitles"]: | |
print("Missing necessary files in the provided directory.") | |
return | |
movie_id = parse_manifest(files["manifest"]) | |
output_file = f"{movie_id}.mkv" | |
# Convert .nfa files to .m4a | |
m4a_files = convert_nfa_to_m4a(files["audio"], episode_dir) | |
# Convert subtitles to SRT | |
srt_files = convert_subtitles(files["subtitles"], episode_dir) | |
# Build and print the FFmpeg command | |
ffmpeg_command = build_ffmpeg_command(files["video"], m4a_files, srt_files, output_file) | |
print("Generated FFmpeg command:") | |
print(" ".join(ffmpeg_command)) | |
# Optionally, run the FFmpeg command | |
subprocess.run(ffmpeg_command) | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
print("Usage: python build_ffmpeg_command.py /path/to/episode_folder") | |
else: | |
main(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment