Cyberes · June 22, 2023 03:04
diff --git a/sillytavern-chat-to-txt.py b/sillytavern-chat-to-txt.py
 #!/usr/bin/env python3
 import argparse
 import re
 from pathlib import Path
 import sys
 import json

 """
 Convert SillyTavern jsonl chats to TXT files.

 HOW TO USE:
 1. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/``
 2. Run this script with `python3 sillytavern-chat-to-txt.py <path to the jsonl file> <path to where you want to save the TXT file>

 These things are stripped from the chat:
 - OOC chat like this: (OOC: bla bla)
 - Text between brackets: [bla bla bla]

 This script isn't going to produce a perfect transcript, but it's close enough.

 """

 cleaner_re = re.compile(r'((\(|\[)OOC:.*?(\]|\)))|((^|\n)\[.*?\]($|\n))')

 def main():
    parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.')
    parser.add_argument('filepath', help='The path to the jsonl file to parse')
    parser.add_argument('output_txt', help='The output TXT file to create.')
    args = parser.parse_args()

    input_jsonl = Path(args.filepath).expanduser().absolute().resolve()
    output_txt = Path(args.output_txt).expanduser().absolute().resolve()

    print('Converting chat:', input_jsonl)

    if not input_jsonl.exists():
        print('Input file does not exist:', input_jsonl)
        sys.exit(1)
    if not output_txt.parent.exists():
        print('Output parent directory does not exist:', output_txt.parent)
        sys.exit(1)

    chatlines = []
    formatted_chat = []
    raw = input_jsonl.read_text().splitlines()
    for i in range(len(raw)):
        try:
            chatlines.append(json.loads(raw[i]))
        except json.decoder.JSONDecodeError:
            print(f'JSON decode error on line {i + 1}:')
            sys.exit(1)

    for msg in chatlines:
        if 'mes' in msg.keys():
            clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n')
            if not len(clean_str):
                continue
            formatted_chat.append({'name': msg['name'], 'msg': clean_str})

    with open(output_txt, 'w') as f:
        for msg in formatted_chat:
            # content = json.loads('"' + msg['msg'] + '"')
            f.write(f"{msg['name']}:\n{msg['msg']}\n\n")

    print(f'Converted {len(formatted_chat)} lines.')
    print('Saved to:', output_txt)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	import argparse
	import re
	from pathlib import Path
	import sys
	import json

	"""
	Convert SillyTavern jsonl chats to TXT files.

	HOW TO USE:
	1. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/``
	2. Run this script with `python3 sillytavern-chat-to-txt.py <path to the jsonl file> <path to where you want to save the TXT file>

	These things are stripped from the chat:
	- OOC chat like this: (OOC: bla bla)
	- Text between brackets: [bla bla bla]

	This script isn't going to produce a perfect transcript, but it's close enough.

	"""

	cleaner_re = re.compile(r'((\(\|\[)OOC:.?(\]\|\)))\|((^\|\n)\[.?\]($\|\n))')

	def main():
	parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.')
	parser.add_argument('filepath', help='The path to the jsonl file to parse')
	parser.add_argument('output_txt', help='The output TXT file to create.')
	args = parser.parse_args()

	input_jsonl = Path(args.filepath).expanduser().absolute().resolve()
	output_txt = Path(args.output_txt).expanduser().absolute().resolve()

	print('Converting chat:', input_jsonl)

	if not input_jsonl.exists():
	print('Input file does not exist:', input_jsonl)
	sys.exit(1)
	if not output_txt.parent.exists():
	print('Output parent directory does not exist:', output_txt.parent)
	sys.exit(1)

	chatlines = []
	formatted_chat = []
	raw = input_jsonl.read_text().splitlines()
	for i in range(len(raw)):
	try:
	chatlines.append(json.loads(raw[i]))
	except json.decoder.JSONDecodeError:
	print(f'JSON decode error on line {i + 1}:')
	sys.exit(1)

	for msg in chatlines:
	if 'mes' in msg.keys():
	clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n')
	if not len(clean_str):
	continue
	formatted_chat.append({'name': msg['name'], 'msg': clean_str})

	with open(output_txt, 'w') as f:
	for msg in formatted_chat:
	# content = json.loads('"' + msg['msg'] + '"')
	f.write(f"{msg['name']}:\n{msg['msg']}\n\n")

	print(f'Converted {len(formatted_chat)} lines.')
	print('Saved to:', output_txt)

	if __name__ == "__main__":
	main()