Last active
June 22, 2023 03:04
-
-
Save Cyberes/f276da3f5f49e981244a8330d74f940d to your computer and use it in GitHub Desktop.
Convert SillyTavern jsonl chats to TXT files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import re | |
from pathlib import Path | |
import sys | |
import json | |
""" | |
Convert SillyTavern jsonl chats to TXT files. | |
HOW TO USE: | |
1. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/`` | |
2. Run this script with `python3 sillytavern-chat-to-txt.py <path to the jsonl file> <path to where you want to save the TXT file> | |
These things are stripped from the chat: | |
- OOC chat like this: (OOC: bla bla) | |
- Text between brackets: [bla bla bla] | |
This script isn't going to produce a perfect transcript, but it's close enough. | |
""" | |
cleaner_re = re.compile(r'((\(|\[)OOC:.*?(\]|\)))|((^|\n)\[.*?\]($|\n))') | |
def main(): | |
parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.') | |
parser.add_argument('filepath', help='The path to the jsonl file to parse') | |
parser.add_argument('output_txt', help='The output TXT file to create.') | |
args = parser.parse_args() | |
input_jsonl = Path(args.filepath).expanduser().absolute().resolve() | |
output_txt = Path(args.output_txt).expanduser().absolute().resolve() | |
print('Converting chat:', input_jsonl) | |
if not input_jsonl.exists(): | |
print('Input file does not exist:', input_jsonl) | |
sys.exit(1) | |
if not output_txt.parent.exists(): | |
print('Output parent directory does not exist:', output_txt.parent) | |
sys.exit(1) | |
chatlines = [] | |
formatted_chat = [] | |
raw = input_jsonl.read_text().splitlines() | |
for i in range(len(raw)): | |
try: | |
chatlines.append(json.loads(raw[i])) | |
except json.decoder.JSONDecodeError: | |
print(f'JSON decode error on line {i + 1}:') | |
sys.exit(1) | |
for msg in chatlines: | |
if 'mes' in msg.keys(): | |
clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n') | |
if not len(clean_str): | |
continue | |
formatted_chat.append({'name': msg['name'], 'msg': clean_str}) | |
with open(output_txt, 'w') as f: | |
for msg in formatted_chat: | |
# content = json.loads('"' + msg['msg'] + '"') | |
f.write(f"{msg['name']}:\n{msg['msg']}\n\n") | |
print(f'Converted {len(formatted_chat)} lines.') | |
print('Saved to:', output_txt) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment