Created
January 11, 2012 20:35
-
-
Save johanlaidlaw/1596626 to your computer and use it in GitHub Desktop.
Scraper for transfermarkt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
include_once('../simple_html_dom.php'); | |
function scraping($url) { | |
// create HTML DOM | |
$html = file_get_html($url); | |
// Find the table that has class="tabelle_spieler" in the source code | |
// There are two of these tables and we want the second one therefor 1 (index start by 0) | |
$table = $html->find('.tabelle_spieler',1); | |
// Go through each <tr> in the table | |
$array_of_tr = $table->find('tr'); | |
foreach($array_of_tr as $row){ | |
// echo "\n"; // Print newline | |
// Go through each <td> inside the <tr> | |
echo $row->find('td',1)->plaintext."\n"; | |
} | |
$transfer_table = $html->find('.standard_tabelle',1); | |
echo gettype($transfer_table); | |
$all_trs = $transfer_table->find('tr'); | |
$index_to_show = array(0,1,3,5); | |
echo "\n\nTransfer History"; | |
foreach($all_trs as $tr){ | |
foreach($tr->find('td') as $index => $td){ | |
if(in_array($index, $index_to_show)) | |
echo str_replace(" ","",$td->plaintext)." - "; | |
} | |
echo "\n"; | |
} | |
// Clean up memory | |
$html->clear(); | |
unset($html); | |
} | |
// Run the function | |
scraping('http://www.transfermarkt.co.uk/en/marcus-hahnemann/transfers/spieler_4140.html'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment