Skip to content

Instantly share code, notes, and snippets.

@tomtor
Last active August 16, 2018 15:23
Show Gist options
  • Save tomtor/7870f05b3a9d9f6c2409a866203a9a72 to your computer and use it in GitHub Desktop.
Save tomtor/7870f05b3a9d9f6c2409a866203a9a72 to your computer and use it in GitHub Desktop.
Clone fishtest LTC PGN files to Google Drive
#!/usr/bin/env python3
import os
import sys
import subprocess
import re
import requests
import bz2
from pymongo import MongoClient, ASCENDING, DESCENDING
from bson.binary import Binary
#fish_host = 'http://localhost:6543'
#fish_host = 'http://94.198.98.239'
fish_host = 'http://tests.stockfishchess.org'
conn = MongoClient('localhost')
#conn.drop_database('fish_clone2')
db = conn['fish_clone2']
pgndb = db['pgns']
runs = db['runs']
pgndb.ensure_index([('run_id', ASCENDING)])
def main():
"""clone a fishtest database with PGNs and runs with the REST API"""
skip = 0
count = 0
tot_games = 0
in_sync = False
loaded = {}
run_cache = {}
while True:
pgn_list = requests.get(fish_host + '/api/pgn_100/' + str(skip)).json()
for pgn_file in pgn_list:
add = False
if pgndb.find_one({'run_id': pgn_file}):
print('Already copied: %s' % (pgn_file))
if not pgn_file in loaded:
in_sync = True
break
else:
run_id = pgn_file.split('-')[0]
if not run_id in run_cache:
run = runs.find_one({'_id': run_id})
if not run:
run = requests.get(fish_host + '/api/get_run/' + run_id).json()
add = True
run_cache[run_id] = run
else:
run = run_cache[run_id]
if not (
re.match('^([6-9][0-9])|([1-9][0-9][0-9])', run['args']['tc'])
or (re.match('^([2-9][0-9])|([1-9][0-9][0-9])', run['args']['tc']) and int(run['args']['threads']) >= 3)
):
#print('skipped %s: %s' % (pgn_file, run['args']['tc']))
continue
else:
print('add %s: %s %s' % (pgn_file, run['args']['tc'], run['args']['threads']))
if add:
print('New run: ' + run_id)
runs.insert(run)
pgn = requests.get(fish_host + '/api/pgn/' + pgn_file)
nr_games = pgn.content.decode().count('PlyCount')
tot_games += nr_games
#print(str(nr_games) + ' games')
tfname = pgn_file + '.pgn.bz2'
tf = '/tmp/' + tfname
with open(tf, "wb") as file:
file.write(bz2.compress(pgn.content))
sys.stdout.flush()
if add:
subprocess.check_call(['up2.sh', '-d', 'fish_games', '-s', run_id, '-v', '-t', tfname, tf])
else:
subprocess.check_call(['up2.sh', '-d', run_id, '-v', '-t', tfname, tf])
os.remove(tf)
pgndb.insert(dict(run_id= pgn_file, nr_games= nr_games))
loaded[pgn_file] = True
count += 1
skip += len(pgn_list)
if in_sync or len(pgn_list) < 100:
break
print('Copied: %6d PGN files (%8d games)' % (count, tot_games))
count = pgndb.count()
nr_games = 0
for p in pgndb.find({}):
nr_games += p.get('nr_games', 0)
print('Database:%6d PGN files (%8d games)' % (count, nr_games))
count = runs.count()
print('Database:%6d runs' % (count))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment