Created
April 5, 2012 15:27
-
-
Save redacted/2311951 to your computer and use it in GitHub Desktop.
Checks balance of StarCraft 2 races based on sc2-replays.net results
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
from collections import defaultdict, namedtuple | |
import BeautifulSoup | |
import sys | |
import time | |
import socket | |
Factions = ["protoss", "zerg", "terran"] | |
Match = namedtuple("Match", ['factions', 'winner', 'map', 'duration']) | |
def remove_non_ascii(s): | |
## some names have non-ascii characters | |
## interferes with pattern matching, and we can discard | |
return "".join(i for i in s if ord(i)<128) | |
def get_player_factions(response, players): | |
## need to get the factions as a player-keyed dict to | |
## determine winner and loser | |
playerFactions = {} | |
for line in response.split("\n"): | |
line = line | |
for f in Factions: | |
if f in line and "rep_oppo" in line: | |
faction_line = line | |
player_factions_raw = faction_line.split("<img src=")[1:] | |
for pf in player_factions_raw: | |
for p in players: | |
if p in pf: | |
player = p | |
for f in Factions: | |
if f in pf: faction = f | |
playerFactions[player] = faction | |
return playerFactions | |
def get_match_details(soup): | |
## Get the players and map | |
matchup = soup.find('title').contents[0].split(":")[-1].strip() | |
players_raw, map_raw = matchup.split("(") | |
players_raw = players_raw.split() | |
map_raw = map_raw.split(',') | |
map_name = map_raw[0].replace(" ", "") | |
if len(map_name) == 0: | |
map_name = "none" | |
return [players_raw[0], players_raw[-1]], map_name | |
def get_match_duration(response): | |
## get the match length (are factions stronger for different lengths?) | |
for l in response.split("\n"): | |
if "length:" in l: | |
t = l.split("</div>")[-1].replace("<br/>","").strip() | |
t = t.split() | |
if len(t) == 2: | |
# seconds only | |
return int(t[0]) | |
elif len(t) == 4: | |
# minutes | |
return int(t[0])*60 + int(t[2]) | |
elif len(t) == 6: | |
# hours! | |
return int(t[0])*3600 + int(t[2])*60 + int(t[4]) | |
def get_winner(soup): | |
## find out who won | |
return soup.find('div', {'id':'winner'}).find('span').contents[0] | |
def build_match_url_list(n_requested): | |
## returns a list of n_requested match URLs | |
## walks the "all match" page as needed to get enough results | |
base_url = "http://www.sc2-replays.net/en/replays/&sort=time&time=&rel=0" | |
page_increase = "&page=" # start at &page=2 | |
matches = get_matches(base_url) | |
page_idx = 2 | |
try: | |
while(len(matches) < n_requested): | |
print "Got", len(matches), "matches" | |
new_url = base_url + page_increase + str(page_idx) | |
matches.extend(get_matches(new_url)) | |
page_idx += 1 | |
time.sleep(0.2) | |
except socket.error: | |
pass | |
print "Got", len(matches), "matches (final)" | |
return matches | |
def get_matches(url): | |
## extract the match result URLs present on a page | |
## slightly hackish, works well though | |
page_matches = set() | |
page1 = urllib2.urlopen(url).read() | |
for line in page1.split("\n"): | |
line = line.split('"') | |
for chunk in line: | |
if "www.sc2-replays.net/en/replays/" in chunk and "-vs-" in chunk: | |
page_matches.add(chunk) | |
return list(page_matches) | |
def get_match_results(url): | |
## given a match URL, requests, parses | |
## returns a Match namedtuple | |
r = remove_non_ascii(urllib2.urlopen(url).read()).lower() | |
m = BeautifulSoup.BeautifulSoup(r) | |
players, mapname = get_match_details(m) | |
player_factions = get_player_factions(r, players) | |
win_faction = player_factions[get_winner(m)] | |
if len(set(player_factions.values())) > 1: | |
factions = "-".join(sorted(player_factions.values())) | |
Match = namedtuple("Match", ['factions', 'winner', 'map', 'duration']) | |
mr = Match(factions, win_faction, mapname, get_match_duration(r)) | |
return mr | |
def calculate_results(n): | |
## the main logic of the program | |
## builds a list of matches, works out who won | |
## returns a dict, keyed by factions playing and with value = list of winners | |
matches = build_match_url_list(n) | |
match_results = [] | |
for idx,m in enumerate(matches): | |
try: | |
print "({0}/{1} Working on {2}".format(idx+1, len(matches), m) | |
mr = get_match_results(m) | |
if mr: match_results.append(mr) | |
except UnboundLocalError: | |
# unicode names cause issues FIXME | |
pass | |
return match_results | |
def wins_by_faction(mr): | |
## given a list of Match namedtuples, returns the win percentages | |
## for each faction in each matchup | |
results = defaultdict(list) | |
for m in mr: | |
results[m.factions].append(m.winner) | |
print "\t Wins by faction \t" | |
for k in results: | |
win_list = results[k] | |
print k, [(w, | |
win_list.count(w), | |
round(win_list.count(w)/float(len(win_list)), 3)) for w in set(win_list)] | |
print "\n" | |
def wins_by_time(mr): | |
## given a list of Match namedtuples, returns the win percentages | |
## for each faction depending on length of match | |
results = defaultdict(list) | |
for m in mr: | |
results[round(m.duration, -2)].append(m.winner) | |
print "\t Wins by time \t" | |
for k in sorted(results): | |
win_list = results[k] | |
print k, [(w, | |
win_list.count(w), | |
round(win_list.count(w)/float(len(win_list)), 3)) for w in set(win_list)] | |
print "\n" | |
def save_to_file(path, mr_list): | |
""" save matches to a text file for later analysis """ | |
with open(path, "w") as dmp: | |
for mr in mr_list: | |
dmp.write("{0} {1} {2} {3}\n".format(mr.factions, mr.winner, mr.map, mr.duration)) | |
def load_saved_file(path): | |
""" given a path to a previously saved match list, load it """ | |
s = [] | |
with open(path) as fin: | |
for l in fin: | |
l = l.split() | |
s.append(Match(l[0], l[1], l[2], int(l[3]))) | |
return s | |
if __name__ == '__main__': | |
try: | |
n_r = int(sys.argv[1]) | |
except: | |
n_r = 25 | |
print "\t Getting {0} matches...".format(n_r) | |
match_results = calculate_results(n=n_r) | |
wins_by_faction(match_results) | |
wins_by_time(match_results) | |
save_to_file("matches.txt", match_results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment