Skip to content

Instantly share code, notes, and snippets.

@jansanchez
Forked from edran/check_rep.py
Created November 22, 2017 04:01
Show Gist options
  • Save jansanchez/0bb7cc14f2845e1beed4575bb3ba701b to your computer and use it in GitHub Desktop.
Save jansanchez/0bb7cc14f2845e1beed4575bb3ba701b to your computer and use it in GitHub Desktop.
Runs through a directory of starcraft replays and outputs all the corrupt ones
# This script tries as best as possible to filter out bad replays
# Pass it a subdir, and it will read all '.rep' files, and spit out a list
# of the corrupt files in stdout
from __future__ import print_function
from pyreplib import replay # https://github.com/HearthSim/pyreplib/
from itertools import repeat
from multiprocessing import Pool, Process, Pipe
from multiprocessing.pool import ThreadPool
import os
import sys
import datetime
release = datetime.datetime(2008, 11, 25) # release date of 1.16
def analyze(repname, conn):
rep = replay.Replay(repname)
if (rep.date < release or rep.engine_name.lower() != "broodwar"):
conn.send(repname)
conn.send(None)
def filterfiles(args):
root, fname = args
if '.rep' in fname and '.lock' not in fname:
return os.path.join(root, fname)
return None
pool = Pool()
flst = []
for root, dirs, files in os.walk(sys.argv[1]):
flst += [f for f in pool.map(filterfiles, zip(repeat(root), files))
if f is not None]
# analyze sometimes segfaults, so a Pool will break
# Instead, just start a new process for each replay
def tpfunc(repname):
conn, send = Pipe()
t = Process(target=analyze, args=(repname, send))
t.start()
t.join()
if conn.poll(5):
res = conn.recv()
if res is not None:
print(res)
else:
print(repname)
# Threadpool makes sure we don't accidentally forkbomb ourselves
tp = ThreadPool()
tp.map(tpfunc, flst)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment