Skip to content

Instantly share code, notes, and snippets.

@akitaonrails
Created November 5, 2024 02:24
Show Gist options
  • Save akitaonrails/581f9b484cdb8577908c88c196161990 to your computer and use it in GitHub Desktop.
Save akitaonrails/581f9b484cdb8577908c88c196161990 to your computer and use it in GitHub Desktop.
Try to convert Netflix videos into an open MKV format
# Procedure:
# - download the episodes you want from the Android Netflix app (there is a limit)
# - connect your smartphone to your PC and navigate to Internal Storage/Android/data/com.netflix.mediaclient/Download/.of.
# - fetch each directory (each directory is one episode)
#
# Each directory should have the following file formats:
# - .nfv - the video stream, possibly already in H.264, unencrypted
# - .nfa - the most difficult file to deal with, it's an unencrypted AAC file in ISO fMP4 (fragmented) format and an unknown "object type 42" that ffmpeg complains
# - .nfs - subtitle in TTML format with custom properties.
# - .nfi - information about the download (unknown format)
# - .manifest - it can tell what language are the audio tracks and subtitles, which is useful to re-build the video envelope
#
# What this script will try to do:
# - convert the .nfa files into a universal .wav format using Gstreamer (ffmpeg complains) on Linux
# - convert the .nfs files into srt and then into .ass format
# - mount a ffmpeg command line to assemble the video, audio and subtitle tracks
#
# Current status:
# - the video file does play with Totem/Mplayer and VLC, but they can't skip ahead. There is some timecode or syncing issue
# - still not sure if there is a need to remux the video track. feels like normal h.264.
# - just tested with one episode from one series. codecs may vary depending on when the show was first published
import os
import sys
import xml.etree.ElementTree as ET
import subprocess
def parse_time(ttime):
ticks = int(ttime.replace("t", ""))
milliseconds = ticks // 10000
hours = milliseconds // 3600000
minutes = (milliseconds % 3600000) // 60000
seconds = (milliseconds % 60000) // 1000
ms = milliseconds % 1000
return f"{hours:02}:{minutes:02}:{seconds:02},{ms:03}"
def ttml_to_srt(ttml_file, srt_file):
tree = ET.parse(ttml_file)
root = tree.getroot()
srt_output = []
for i, p in enumerate(root.iter("{http://www.w3.org/ns/ttml}p"), start=1):
begin = p.get("begin")
end = p.get("end")
text = "".join(p.itertext()).replace("\n", " ").strip()
srt_output.append(f"{i}\n{parse_time(begin)} --> {parse_time(end)}\n{text}\n")
with open(srt_file, "w", encoding="utf-8") as f:
f.write("\n".join(srt_output))
def find_files(episode_dir):
files = {
"manifest": None,
"video": None,
"audio": [],
"subtitles": []
}
for root, _, filenames in os.walk(episode_dir):
for filename in filenames:
if filename.endswith(".manifest"):
files["manifest"] = os.path.join(root, filename)
elif filename.endswith(".nfv"):
files["video"] = os.path.join(root, filename)
elif filename.endswith(".nfa"):
files["audio"].append(os.path.join(root, filename))
elif filename.endswith(".nfs"):
files["subtitles"].append(os.path.join(root, filename))
return files
def parse_manifest(manifest_file):
movie_id = None
if manifest_file:
with open(manifest_file, 'r', encoding="utf-8") as f:
content = f.read()
movie_id = content.split('"movieId":"')[1].split('"')[0]
return movie_id or "output"
def convert_nfa_to_m4a(nfa_files, episode_dir):
m4a_files = []
for nfa_file in nfa_files:
wav_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(nfa_file))[0] + ".wav")
m4a_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(nfa_file))[0] + ".m4a")
# Step 1: Use GStreamer to extract audio to WAV
try:
result = subprocess.run(
[
"gst-launch-1.0", "filesrc", f"location={nfa_file}", "!",
"decodebin", "!", "audioconvert", "!", "audioresample", "!",
"wavenc", "!", f"filesink", f"location={wav_file}"
],
capture_output=True, text=True
)
print("GStreamer output (nfa to wav):", result.stdout)
print("GStreamer error (nfa to wav):", result.stderr)
if result.returncode != 0:
print(f"Error converting {nfa_file} to .wav using GStreamer.")
continue # Skip to the next file if this one failed
except Exception as e:
print(f"Exception during GStreamer nfa to wav conversion: {e}")
continue
# Step 2: Convert WAV to M4A with FFmpeg
try:
result = subprocess.run(
["ffmpeg", "-i", wav_file, "-c:a", "aac", "-b:a", "128k", "-movflags", "+faststart", m4a_file],
capture_output=True, text=True
)
print("FFmpeg output (wav to m4a):", result.stdout)
print("FFmpeg error (wav to m4a):", result.stderr)
if result.returncode != 0:
print(f"Error converting {wav_file} to .m4a.")
continue # Skip to the next file if this one failed
except Exception as e:
print(f"Exception during wav to m4a conversion: {e}")
continue
# Clean up intermediate WAV file
if os.path.exists(wav_file):
os.remove(wav_file)
m4a_files.append(m4a_file)
return m4a_files
def convert_subtitles(subtitle_files, episode_dir):
srt_files = []
for subtitle_file in subtitle_files:
srt_file = os.path.join(episode_dir, os.path.splitext(os.path.basename(subtitle_file))[0] + ".srt")
ttml_to_srt(subtitle_file, srt_file)
srt_files.append(srt_file)
return srt_files
def build_ffmpeg_command(video_file, audio_files, subtitle_files, output_file, subtitle_languages, audio_languages):
ffmpeg_command = ["ffmpeg", "-fflags", "+genpts", "-i", video_file, "-c:v", "libx264", "-crf", "18", "-vsync", "cfr"]
# Add audio files with language metadata
for i, audio_file in enumerate(audio_files):
ffmpeg_command.extend(["-i", audio_file, "-metadata:s:a:" + str(i), "language=" + audio_languages[i]])
# Add subtitle files with language metadata
for i, subtitle_file in enumerate(subtitle_files):
ffmpeg_command.extend(["-i", subtitle_file, "-metadata:s:s:" + str(i), "language=" + subtitle_languages[i]])
# Map streams (video, audio, subtitles)
ffmpeg_command.extend(["-map", "0:v"])
for i in range(len(audio_files)):
ffmpeg_command.extend(["-map", str(i + 1) + ":a"])
for i in range(len(subtitle_files)):
ffmpeg_command.extend(["-map", str(len(audio_files) + 1 + i) + ":s"])
# Set codec for subtitles
ffmpeg_command.extend(["-c:s", "ass", "-f", "matroska", output_file])
return ffmpeg_command
def build_ffmpeg_command2(video_file, audio_files, srt_files, output_file):
audio_languages = ["eng", "ja"]
subtitle_languages = ["pt-BR", "en"]
# Start building the ffmpeg command with input files
ffmpeg_command = ["ffmpeg", "-fflags", "+genpts", "-i", video_file]
# Add audio files with language metadata
for i, audio_file in enumerate(audio_files):
ffmpeg_command.extend(["-i", audio_file])
# Convert SRT to ASS and add subtitle files with metadata
for srt_file in srt_files:
ass_file = os.path.splitext(srt_file)[0] + ".ass"
subprocess.run(["ffmpeg", "-i", srt_file, ass_file])
ffmpeg_command.extend(["-i", ass_file])
# Map video, audio, and subtitles
ffmpeg_command.extend(["-map", "0:v"])
for i in range(len(audio_files)):
ffmpeg_command.extend(["-map", f"{i + 1}:a"])
for i in range(len(srt_files)):
ffmpeg_command.extend(["-map", f"{i + len(audio_files) + 1}:s"])
# Specify codecs and formats
ffmpeg_command.extend(["-c:v", "copy", "-c:a", "copy", "-c:s", "ass"])
# Add metadata for languages
for i, lang in enumerate(audio_languages[:len(audio_files)]):
ffmpeg_command.extend(["-metadata:s:a:" + str(i), "language=" + lang])
for i, lang in enumerate(subtitle_languages[:len(srt_files)]):
ffmpeg_command.extend(["-metadata:s:s:" + str(i), "language=" + lang])
# Additional flags to handle timestamp and sync issues
ffmpeg_command.extend(["-avoid_negative_ts", "make_zero", "-f", "matroska", output_file])
return ffmpeg_command
def main(episode_dir):
files = find_files(episode_dir)
if not files["video"] or not files["audio"] or not files["subtitles"]:
print("Missing necessary files in the provided directory.")
return
movie_id = parse_manifest(files["manifest"])
output_file = f"{movie_id}.mkv"
# Convert .nfa files to .m4a
m4a_files = convert_nfa_to_m4a(files["audio"], episode_dir)
# Convert subtitles to SRT
srt_files = convert_subtitles(files["subtitles"], episode_dir)
# Build and print the FFmpeg command
ffmpeg_command = build_ffmpeg_command(files["video"], m4a_files, srt_files, output_file)
print("Generated FFmpeg command:")
print(" ".join(ffmpeg_command))
# Optionally, run the FFmpeg command
subprocess.run(ffmpeg_command)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python build_ffmpeg_command.py /path/to/episode_folder")
else:
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment