Last active
August 16, 2018 15:23
-
-
Save tomtor/7870f05b3a9d9f6c2409a866203a9a72 to your computer and use it in GitHub Desktop.
Clone fishtest LTC PGN files to Google Drive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import sys | |
import subprocess | |
import re | |
import requests | |
import bz2 | |
from pymongo import MongoClient, ASCENDING, DESCENDING | |
from bson.binary import Binary | |
#fish_host = 'http://localhost:6543' | |
#fish_host = 'http://94.198.98.239' | |
fish_host = 'http://tests.stockfishchess.org' | |
conn = MongoClient('localhost') | |
#conn.drop_database('fish_clone2') | |
db = conn['fish_clone2'] | |
pgndb = db['pgns'] | |
runs = db['runs'] | |
pgndb.ensure_index([('run_id', ASCENDING)]) | |
def main(): | |
"""clone a fishtest database with PGNs and runs with the REST API""" | |
skip = 0 | |
count = 0 | |
tot_games = 0 | |
in_sync = False | |
loaded = {} | |
run_cache = {} | |
while True: | |
pgn_list = requests.get(fish_host + '/api/pgn_100/' + str(skip)).json() | |
for pgn_file in pgn_list: | |
add = False | |
if pgndb.find_one({'run_id': pgn_file}): | |
print('Already copied: %s' % (pgn_file)) | |
if not pgn_file in loaded: | |
in_sync = True | |
break | |
else: | |
run_id = pgn_file.split('-')[0] | |
if not run_id in run_cache: | |
run = runs.find_one({'_id': run_id}) | |
if not run: | |
run = requests.get(fish_host + '/api/get_run/' + run_id).json() | |
add = True | |
run_cache[run_id] = run | |
else: | |
run = run_cache[run_id] | |
if not ( | |
re.match('^([6-9][0-9])|([1-9][0-9][0-9])', run['args']['tc']) | |
or (re.match('^([2-9][0-9])|([1-9][0-9][0-9])', run['args']['tc']) and int(run['args']['threads']) >= 3) | |
): | |
#print('skipped %s: %s' % (pgn_file, run['args']['tc'])) | |
continue | |
else: | |
print('add %s: %s %s' % (pgn_file, run['args']['tc'], run['args']['threads'])) | |
if add: | |
print('New run: ' + run_id) | |
runs.insert(run) | |
pgn = requests.get(fish_host + '/api/pgn/' + pgn_file) | |
nr_games = pgn.content.decode().count('PlyCount') | |
tot_games += nr_games | |
#print(str(nr_games) + ' games') | |
tfname = pgn_file + '.pgn.bz2' | |
tf = '/tmp/' + tfname | |
with open(tf, "wb") as file: | |
file.write(bz2.compress(pgn.content)) | |
sys.stdout.flush() | |
if add: | |
subprocess.check_call(['up2.sh', '-d', 'fish_games', '-s', run_id, '-v', '-t', tfname, tf]) | |
else: | |
subprocess.check_call(['up2.sh', '-d', run_id, '-v', '-t', tfname, tf]) | |
os.remove(tf) | |
pgndb.insert(dict(run_id= pgn_file, nr_games= nr_games)) | |
loaded[pgn_file] = True | |
count += 1 | |
skip += len(pgn_list) | |
if in_sync or len(pgn_list) < 100: | |
break | |
print('Copied: %6d PGN files (%8d games)' % (count, tot_games)) | |
count = pgndb.count() | |
nr_games = 0 | |
for p in pgndb.find({}): | |
nr_games += p.get('nr_games', 0) | |
print('Database:%6d PGN files (%8d games)' % (count, nr_games)) | |
count = runs.count() | |
print('Database:%6d runs' % (count)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment