Skip to content

Instantly share code, notes, and snippets.

@toddbranch
Last active August 29, 2015 14:19
Show Gist options
  • Select an option

  • Save toddbranch/8b78531e1e4d6d347d9b to your computer and use it in GitHub Desktop.

Select an option

Save toddbranch/8b78531e1e4d6d347d9b to your computer and use it in GitHub Desktop.
NFL draft scraper and SQL schema / import
# Schema for a draft table to hold all of the picks
CREATE TABLE IF NOT EXISTS draft
(
year smallint(4) unsigned,
pick smallint(3) unsigned,
name varchar(255),
team varchar(255),
position varchar(5),
school varchar(255)
);
# Commands to import the picks
# Ensure you've created the draft table first
# You'll have to change the file location
TRUNCATE draft;
LOAD DATA INFILE '/home/toddbranch/php/nfl_draft/results.csv'
INTO TABLE draft
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n';
# Usage: php scraper.php > results.csv
#
# Scrapes from www.nfl.com/draft/history/fulldraft?season=2014
<?php
class Pick
{
private $year;
private $pick;
private $name;
private $team;
private $position;
private $school;
public function __construct($row, $year)
{
$tds = $row->getElementsByTagName('td');
$this->pick = $tds[0]->textContent;
$this->name = $tds[2]->getElementsByTagName('a')[0]->textContent;
$this->team = $tds[1]->getElementsByTagName('a')[0]->textContent;
$this->position = $tds[3]->textContent;
$this->school = $tds[4]->textContent;
$this->year = $year;
}
private function toArray()
{
return [
'year' => $this->year,
'pick' => $this->pick,
'name' => $this->name,
'team' => $this->team,
'position' => $this->position,
'school' => $this->school
];
}
public function printCSV()
{
echo implode(",", $this->toArray()) . "\n";
}
public function printReadable()
{
echo implode("\t", $this->toArray()) . "\n";
}
public function isPosition($position)
{
return $position == $this->position;
}
}
libxml_use_internal_errors(true);
$dom = new DOMDocument;
for ($i = 1967; $i < 2015; $i++) {
$page = file_get_contents('http://www.nfl.com/draft/history/fulldraft?season=' . $i);
$dom->loadHTML($page);
$rounds = $dom->getElementsByTagName('table');
foreach ($rounds as $round) {
$table = $round;
$rows = $table->getElementsByTagName('tr');
$rowArray = [];
foreach ($rows as $row) {
$rowArray[] = $row;
}
$rowArray = array_slice($rowArray, 2);
foreach ($rowArray as $row) {
$pick = new Pick($row, $i);
$pick->printCSV();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment