Last active
August 23, 2023 23:34
-
-
Save CyberRex0/d481c4c2be6dc47fee4b50cefadf2074 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Misskey Note Dump Tool for Meilisearch | |
# You need to install psycopg2, pytz from pip | |
# | |
# python3 dump_misskey_note_data.py --db-host HOST --db-user USER --db-pass PASS --db-name NAME --db-port PORT \ | |
# --ms-base-url MEILISEARCH_BASE_URL --dump-per PER_NUM --master-key MASTER_KEY --index INDEX_NAME | |
# | |
# [Required Parameters] | |
# --db-user, --db-name | |
# | |
# [Optional Parameters] | |
# --db-host, --db-port, --db-pass, --ms-base-url, --dump-per, --master-key, --index | |
import psycopg2 | |
import psycopg2.extras | |
import requests | |
import datetime | |
import pytz | |
import argparse | |
class ArgT: | |
db_host: str | |
db_user: str | |
db_pass: str | |
db_name: str | |
db_port: int | |
ms_base_url: str | |
dump_per: int | |
master_key: str | |
index: str | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--db-host', type=str, default='localhost', required=False) | |
parser.add_argument('--db-user', type=str, default='localhost', required=True) | |
parser.add_argument('--db-port', type=int, default=5432, required=False) | |
parser.add_argument('--db-pass', type=str, default='', required=False) | |
parser.add_argument('--db-name', type=str, required=True) | |
parser.add_argument('--ms-base-url', type=str, default='http://localhost:7700', required=False) | |
parser.add_argument('--dump-per', type=int, default=10000, required=False) | |
parser.add_argument('--master-key', type=str, default='', required=False) | |
parser.add_argument('--index', type=str, default='misskey', required=False) | |
args: ArgT = parser.parse_args() | |
def unixEpoch(dt): | |
dt_utc = dt.astimezone(pytz.UTC) | |
return (dt_utc - datetime.datetime(1970, 1, 1, tzinfo=pytz.UTC)).total_seconds() * 1000 | |
db = psycopg2.connect( | |
host=args.db_host, | |
user=args.db_user, | |
password=args.db_pass, | |
database=args.db_name, | |
port=args.db_port, | |
cursor_factory=psycopg2.extras.DictCursor | |
) | |
lmt = args.dump_per | |
ofs = 0 | |
notes = [] | |
total_notes = 0 | |
global_reqargs = {'headers': {}} | |
if args.master_key: | |
global_reqargs['headers']['Authorization'] = 'Bearer ' + args.master_key | |
with db.cursor() as cur: | |
cur.execute('SELECT COUNT(*) FROM "public"."note"') | |
res = cur.fetchone() | |
total_notes = res[0] | |
while True: | |
with db.cursor() as cur: | |
cur.execute('SELECT * FROM "public"."note" \ | |
WHERE ("note"."visibility" = \'public\' OR \ | |
"note"."visibility" = \'home\') AND \ | |
"note"."renoteId" IS NULL \ | |
LIMIT ' + str(lmt) + ' OFFSET ' + str(ofs)) | |
qnotes = cur.fetchall() | |
if not qnotes: | |
break | |
for note in qnotes: | |
notes.append({ | |
'id': note['id'], | |
'text': note['text'], | |
'createdAt': unixEpoch(note['createdAt']), | |
'userId': note['userId'], | |
'userHost': note['userHost'], | |
'channelId': note['channelId'], | |
'cw': note['cw'], | |
'tags': note['tags'] | |
}) | |
r = requests.post(f'{args.ms_base_url}/indexes/{args.index}/documents?primaryKey=id', json=notes, **global_reqargs) | |
if r.status_code != 202: | |
print(f'Error ({r.status_code})') | |
print(r.content.decode()) | |
continue | |
notes.clear() | |
print(f'{(ofs/total_notes)*100:.2f}%') | |
ofs = ofs + lmt | |
print(f'**** Complete ****') | |
db.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment