Last active
January 18, 2016 17:03
-
-
Save rurban/2f1eabc751ebdc3cd056 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# git clone https://github.com/BuzzFeedNews/2016-01-tennis-betting-analysis | |
# cd 2016-01-tennis-betting-analysis | |
use Digest::SHA 'sha256_hex'; | |
# atp top100 2008-12-22 | |
# wget 'http://www.atpworldtour.com/en/rankings/singles?rankDate=2008-12-22' -o singles.html | |
# grep /overview singles.html | perl -lne'm{">(.*)<\/a>} && print $1' > players | |
my @n = split(/\n/, `cat players`); | |
my (%c,%loser, %winner); | |
# get all the losers hashes and skip the grep | |
# highest prob (the 4 players with Bonferroni significance > 95%) | |
# 1. (58) f16cc81d239ad735c51cc71442cda44c4d1a9323eb41018314d228c80c352e50 | |
# 2. (235) 33367d214715ab5f5e335cd67dbc90e62983b98e5278a4eadf39c3a18124509e | |
# 3. (293) 6702a5de750846f45a3d977f50023c1b20156c61949f2f407b9c5b71d7d93a18 | |
# 4. (82) 9c92af8ca1b57024bd0a39b73db8be44b25bcde4115549cd80e7ef15fc3bd516 | |
# | |
# vs e.g. Martin Vassallo Arguello: | |
# 57964fd78b1e7efda07dc8a1f3593342d48a35fe1f823b46eaf72ea9bac81afa | |
# Nikolay Davydenko | |
# 30fa25c6f80677171a61c42c757356640222277b0da62ad048a3b7bb9777bfd8 | |
open my $csv, '<', 'data/anonymous_betting_data.csv'; | |
for (<$csv>) { | |
my @s = split ',', $_; | |
$loser{$s[12]}++; | |
$winner{$s[13]}++; | |
} | |
#print keys %loser; | |
sub search { | |
for (@_) { | |
next if exists $c{$_}; | |
$c{$_}++; | |
my $hash = sha256_hex($_); | |
#print "$hash\t$_\n-----\n"; | |
print $_ if $hash eq $loser{$_}; | |
print $_ if $hash eq $winner{$_}; | |
} | |
} | |
search @n; | |
#exit; | |
# | |
#my @x; | |
#for my $s ($n) { | |
# $_ = $s; | |
# s/AVY/AWY/g; | |
# push @x, $_; | |
# s/ay/ai/; | |
# push @x, $_; | |
#} | |
#push @n, @x; | |
#@x=(); | |
#for my $s (@n) { | |
# $_ = $s; | |
# s/ikola././; | |
# push @x, $_; | |
# s/\.//; | |
# push @x, $_; | |
#} | |
#push @n, @x; | |
@x=(); | |
for my $s (@n) { | |
$_ = $s; | |
push @x, lc $_, uc $_; | |
} | |
push @n, @x; | |
@x=(); | |
for my $s (@n) { | |
$_ = $s; | |
s/^(.+) (.+)$/$2 $1/; | |
push @x, $_; | |
s/^(.+) (.+)$/uc($1)." $2"/e; | |
push @x, $_; | |
$_ = $s; | |
s/^(.).+ (.+)$/$2 $1/; | |
push @x, $_; | |
$_ = $s; | |
s/^(.).+ (.+)$/$2 $1./; | |
push @x, $_; | |
} | |
push @n, @x; | |
@x= (); | |
for (@n) { | |
push @x, "$_\n"; | |
} | |
push @n, @x; | |
search @n; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment