Skip to content

Instantly share code, notes, and snippets.

@pukkandan
Last active August 9, 2024 17:41
Show Gist options
  • Save pukkandan/ee737fec64822f2552caf3ca4cbf5db7 to your computer and use it in GitHub Desktop.
Save pukkandan/ee737fec64822f2552caf3ca4cbf5db7 to your computer and use it in GitHub Desktop.
Prettify and nest comments from yt-dlp's info.json file and write it to a new html/json file
#!/usr/bin/env python3
"""
SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT
Copyright © 2021 [email protected]
* Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote
* Change FIELDS according to your needs
The output file will be in the format:
[{
'text': 'comment 1',
...
'replies': [{
'text': 'reply 1',
...
'replies': [...],
}, ...],
}, ...]
"""
import os.path
import json
import argparse
from datetime import datetime
def get_fields(dct):
for name, fn in FIELDS.items():
val = fn(dct, name)
if val is not None:
yield name, val
def filter_func(comments):
return [dict(get_fields(c)) for c in comments]
FIELDS = {
'text': dict.get,
'author': dict.get,
'timestamp': lambda dct, name: dct.get(name) and datetime.strftime(
datetime.utcfromtimestamp(dct.get(name)), '%Y/%m/%d'),
# Add more fields here
'replies': lambda dct, name: filter_func(dct.get(name, [])) or None
}
parser = argparse.ArgumentParser()
parser.add_argument(
'--input-file', '-i',
dest='inputfile', metavar='FILE', required=True,
help='File to read video metadata from (info.json)')
parser.add_argument(
'--output-file', '-o',
dest='outputfile', metavar='FILE', required=True,
help='File to write comments to (json / html)')
args = parser.parse_args()
ext = os.path.splitext(args.outputfile)[1][1:]
if ext == 'html': # Error early
try:
from json2html import json2html
except ImportError:
raise SystemExit('ERROR: json2html is required for html output. Install it with pip install json2html')
elif ext != 'json':
raise SystemExit(f'ERROR: Only json and html formats are supported, not {ext}')
print('Reading file')
with open(args.inputfile, encoding='utf-8') as f:
info_dict = json.load(f)
comment_data = {c['id']: c for c in sorted(
info_dict['comments'], key=lambda c: c.get('timestamp') or 0)}
count = len(info_dict['comments'])
del info_dict
nested_comments = []
for i, (cid, c) in enumerate(comment_data.items(), 1):
print(f'Processing comment {i}/{count}', end='\r')
parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', [])
parent.append(c)
del parent
print('')
nested_comments = filter_func(nested_comments)
if ext == 'json':
print('Converting to json')
out = json.dumps(nested_comments, indent=4, ensure_ascii=False)
elif ext == 'html':
print('Converting to html')
out = json2html.convert(nested_comments)
del nested_comments
print('Writing file')
with open(args.outputfile, 'w', encoding='utf-8') as f:
f.write(out)
print('Done')
@tinyapps
Copy link

@jftuga, I have forked the script and added a few more modifications. Please let me know if you have any questions.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment