Equim-chan · March 17, 2024 11:16
diff --git a/README.adoc b/README.adoc
diff --git a/chat-archive.py b/chat-archive.py
 #!/usr/bin/env python

 import sys
 import time
 import logging
 import json
 from datetime import datetime, timedelta, timezone

 import pytchat
 from pytchat.processors.dummy_processor import DummyProcessor
 from pytchat.processors.html_archiver import HTMLArchiver

 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(pathname)s:%(lineno)s:\t%(msg)s')

 def fetch(video_id, fallback_poll_interval=5):
    stream = pytchat.create(video_id=video_id, processor=DummyProcessor())
    total_len = 0

    with open(video_id + '.json', 'a') as json_out:
        logging.info(f'appending to {json_out.name}')

        while stream.is_alive():
            poll_interval = fallback_poll_interval

            chats = stream.get()
            if len(chats) != 1:
                logging.info(f'len(chats) != 1, sleep: {poll_interval}')
                time.sleep(poll_interval)
                continue

            chat = chats[0]
            if not chat:
                logging.info(f'chats[0] is empty, sleep: {poll_interval}')
                time.sleep(poll_interval)
                continue

            poll_interval = chat.get('timeout', poll_interval)
            chatdata = chat.get('chatdata') or []
            for item in chatdata:
                json_out.write(json.dumps(item, ensure_ascii=False, sort_keys=True, separators=(',', ':')) + '\n')

            logging.info(f'len: {total_len} + {len(chatdata)}, sleep: {poll_interval}')
            total_len += len(chatdata)

            time.sleep(poll_interval)

 def render(video_id, start_us_utc):
    start = datetime.fromtimestamp(start_us_utc / 1e6, timezone.utc)
    ar = HTMLArchiver(video_id + '.html')

    with open(video_id + '.json') as json_in:
        logging.info(f'reading from {json_in.name}')
        batch = []

        for line in json_in:
            chat = json.loads(line)
            if 'addChatItemAction' not in chat:
                continue

            # write elapsed time
            for k, v in chat['addChatItemAction']['item'].items():
                if not v.get('timestampUsec'):
                    continue
                timestamp_us = float(v['timestampUsec'])
                timestamp = datetime.fromtimestamp(timestamp_us / 1e6, timezone.utc)
                if timestamp >= start:
                    elapsed = str(timestamp - start)
                else:
                    elapsed = '-' + str(start - timestamp)
                chat['addChatItemAction']['item'][k]['timestampText'] = {'simpleText': elapsed}

            batch.append(chat)
            if (len(batch)+1) % 32 == 0:
                ar.process([{'chatdata': batch}])
                batch.clear()

        if len(batch) > 0:
            ar.process([{'chatdata': batch}])
    ar.finalize()

 if __name__ == '__main__':
    verb = sys.argv[1]
    if verb == 'fetch':
        video_id = sys.argv[2]
        fetch(video_id)
    elif verb == 'render':
        video_id = sys.argv[2]
        start_us_utc = float(sys.argv[3])
        render(video_id, start_us_utc)
    else:
        sys.exit(1)
diff --git a/get_start_time.sh b/get_start_time.sh
 #!/usr/bin/env bash

 youtube-dl --cookies cookies.txt -g "https://www.youtube.com/watch?v=$1" | \
  head -n 1 | \
  xargs curl -SsL | \
  tail -n 1 | \
  xargs curl -SsL -I | \
  grep -i 'last-modified' | \
  sed 's/last-modified: //i' | \
  xargs -d '\n' date +'%s%6N' --utc -d
diff --git a/slchk.py b/slchk.py
 #!/usr/bin/env python

 import re
 import sys

 log_file = sys.argv[1]

 segment_pat = re.compile(r'^.+ Segment')
 segments = []
 with open(log_file) as f:
    for line in f:
        if 'Segment' not in line:
            continue
        segment_id = int(segment_pat.sub('', line.replace('complete', '')).strip())
        timestamp = line[1:line.index(']')]
        segments.append((segment_id, timestamp))
 segments.sort(key=lambda x: x[0])

 print(f'start: {segments[0][0]} ({segments[0][1]})')
 print(f'end: {segments[-1][0]} ({segments[-1][1]})')

 latest_id, latest_timestamp = segments[0]
 missing_count = 0

 for segment_id, timestamp in segments[1:]:
    if segment_id == latest_id + 1:
        latest_id = segment_id
        latest_timestamp = timestamp
        continue
    has_missing = True

    delta = segment_id - latest_id - 1
    if delta > 1:
        print(f'missing: {latest_id + 1}-{segment_id - 1} ({latest_timestamp} - {timestamp})')
    else:
        print(f'missing: {latest_id + 1} ({latest_timestamp} - {timestamp})')
    missing_count += delta

    latest_id = segment_id
    latest_timestamp = timestamp

 print(f'missing segments count: {missing_count}')

 sys.exit(1 if missing_count > 0 else 0)
diff --git a/streamlink-cookie.py b/streamlink-cookie.py
 #!/usr/bin/env python

 import sys
 from subprocess import Popen

 cmd = ['streamlink']
 with open('cookies.txt') as f:
    for line in f:
        line = line.strip()
        if len(line) == 0 or line.startswith('# '):
            continue

        seps = line.split('\t')
        key, value = seps[-2], seps[-1]
        if key.startswith('ST-'):
            continue

        cmd += ('--http-cookie', f'{key}={value}')
 cmd += sys.argv[1:]

 proc = Popen(cmd, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr)
 proc.wait()
 sys.exit(proc.returncode)
	#!/usr/bin/env python

	import sys
	import time
	import logging
	import json
	from datetime import datetime, timedelta, timezone

	import pytchat
	from pytchat.processors.dummy_processor import DummyProcessor
	from pytchat.processors.html_archiver import HTMLArchiver

	logging.basicConfig(level=logging.INFO, format='%(asctime)s %(pathname)s:%(lineno)s:\t%(msg)s')

	def fetch(video_id, fallback_poll_interval=5):
	stream = pytchat.create(video_id=video_id, processor=DummyProcessor())
	total_len = 0

	with open(video_id + '.json', 'a') as json_out:
	logging.info(f'appending to {json_out.name}')

	while stream.is_alive():
	poll_interval = fallback_poll_interval

	chats = stream.get()
	if len(chats) != 1:
	logging.info(f'len(chats) != 1, sleep: {poll_interval}')
	time.sleep(poll_interval)
	continue

	chat = chats[0]
	if not chat:
	logging.info(f'chats[0] is empty, sleep: {poll_interval}')
	time.sleep(poll_interval)
	continue

	poll_interval = chat.get('timeout', poll_interval)
	chatdata = chat.get('chatdata') or []
	for item in chatdata:
	json_out.write(json.dumps(item, ensure_ascii=False, sort_keys=True, separators=(',', ':')) + '\n')

	logging.info(f'len: {total_len} + {len(chatdata)}, sleep: {poll_interval}')
	total_len += len(chatdata)

	time.sleep(poll_interval)

	def render(video_id, start_us_utc):
	start = datetime.fromtimestamp(start_us_utc / 1e6, timezone.utc)
	ar = HTMLArchiver(video_id + '.html')

	with open(video_id + '.json') as json_in:
	logging.info(f'reading from {json_in.name}')
	batch = []

	for line in json_in:
	chat = json.loads(line)
	if 'addChatItemAction' not in chat:
	continue

	# write elapsed time
	for k, v in chat['addChatItemAction']['item'].items():
	if not v.get('timestampUsec'):
	continue
	timestamp_us = float(v['timestampUsec'])
	timestamp = datetime.fromtimestamp(timestamp_us / 1e6, timezone.utc)
	if timestamp >= start:
	elapsed = str(timestamp - start)
	else:
	elapsed = '-' + str(start - timestamp)
	chat['addChatItemAction']['item'][k]['timestampText'] = {'simpleText': elapsed}

	batch.append(chat)
	if (len(batch)+1) % 32 == 0:
	ar.process([{'chatdata': batch}])
	batch.clear()

	if len(batch) > 0:
	ar.process([{'chatdata': batch}])
	ar.finalize()

	if __name__ == '__main__':
	verb = sys.argv[1]
	if verb == 'fetch':
	video_id = sys.argv[2]
	fetch(video_id)
	elif verb == 'render':
	video_id = sys.argv[2]
	start_us_utc = float(sys.argv[3])
	render(video_id, start_us_utc)
	else:
	sys.exit(1)
	#!/usr/bin/env bash

	youtube-dl --cookies cookies.txt -g "https://www.youtube.com/watch?v=$1" \| \
	head -n 1 \| \
	xargs curl -SsL \| \
	tail -n 1 \| \
	xargs curl -SsL -I \| \
	grep -i 'last-modified' \| \
	sed 's/last-modified: //i' \| \
	xargs -d '\n' date +'%s%6N' --utc -d
	#!/usr/bin/env python

	import re
	import sys

	log_file = sys.argv[1]

	segment_pat = re.compile(r'^.+ Segment')
	segments = []
	with open(log_file) as f:
	for line in f:
	if 'Segment' not in line:
	continue
	segment_id = int(segment_pat.sub('', line.replace('complete', '')).strip())
	timestamp = line[1:line.index(']')]
	segments.append((segment_id, timestamp))
	segments.sort(key=lambda x: x[0])

	print(f'start: {segments[0][0]} ({segments[0][1]})')
	print(f'end: {segments[-1][0]} ({segments[-1][1]})')

	latest_id, latest_timestamp = segments[0]
	missing_count = 0

	for segment_id, timestamp in segments[1:]:
	if segment_id == latest_id + 1:
	latest_id = segment_id
	latest_timestamp = timestamp
	continue
	has_missing = True

	delta = segment_id - latest_id - 1
	if delta > 1:
	print(f'missing: {latest_id + 1}-{segment_id - 1} ({latest_timestamp} - {timestamp})')
	else:
	print(f'missing: {latest_id + 1} ({latest_timestamp} - {timestamp})')
	missing_count += delta

	latest_id = segment_id
	latest_timestamp = timestamp

	print(f'missing segments count: {missing_count}')

	sys.exit(1 if missing_count > 0 else 0)