Created
April 25, 2012 01:55
-
-
Save ptgolden/2485460 to your computer and use it in GitHub Desktop.
Parse play-by-play data from basketballvalue.com for use in Gephi
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import argparse | |
import csv | |
import re | |
parser = argparse.ArgumentParser( | |
description='Extract plays involving assists from raw play-by-play data') | |
parser.add_argument('input_file',metavar='FILE') | |
parser.add_argument('--team', help='Three letter team name to search \ | |
for, e.g. SAC, MEM, NYK') | |
args = parser.parse_args() | |
assist_pattern = re.compile(r''' | |
\[%s\ # Scoring team | |
(?P<t0_score>\d+)-(?P<t1_score>.*?)\]\ # Team scores | |
(?P<scorer>(?:[A-Z]\.\ )?\w+)\ # Player who scored | |
(?P<score_method>.*?)[:].*? # Method of scoring | |
Assist:\ (?P<assister>.*?)\ \( # Assister | |
''' % args.team, re.X) | |
players = {} | |
def get_player_idx(p): | |
if p not in players: | |
players[p] = len(players) + 1 | |
return players[p] | |
with open(args.input_file, 'rb') as f, open('assists.csv', 'wb') as outfile: | |
data = csv.reader(f, delimiter='\t') | |
out = csv.writer(outfile, delimiter='\t') | |
headers= ['SOURCE', 'TARGET'] | |
out.writerow(headers) | |
for game, line, time_remaining, play in data: | |
assisted = re.search(assist_pattern, play) | |
if not assisted: | |
continue | |
p1 = get_player_idx(assisted.group('scorer')) | |
p2 = get_player_idx(assisted.group('assister')) | |
out.writerow([p1, p2]) | |
with open('players.csv', 'wb') as players_outfile: | |
players_out = csv.writer(players_outfile, delimiter='\t') | |
players_out.writerow(['ID', 'LABEL']) | |
for player, pid in players.items(): | |
players_out.writerow([pid, player]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment