Created
June 13, 2011 22:51
-
-
Save rileydutton/1023932 to your computer and use it in GitHub Desktop.
Website Scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
include 'phpQuery-onefile.php'; | |
$file = 'http://www.usbr.gov/lc/region/g4000/riverdata/gage-month-table.cfm?GAGE=3'; // see below for source | |
// loads the file | |
// basically think of your php script as a regular HTML page running client side with jQuery. This loads whatever file you want to be the current page | |
phpQuery::newDocumentFileHTML($file); | |
//Work our way through the DOM tree. | |
$lastTr = pq("table tr:last-child")->prev("tr")->prev("tr"); | |
$testTd = $lastTr[0]->find("td"); | |
//Is the last row empty? If so use the row before it. | |
if($testTd[1]->text() == "") { | |
$lastTr = $lastTr->prev('tr'); | |
} | |
//Get the td's inside the table row. | |
$infoTd = $lastTr[0]->find("td"); | |
//We have to do it this way for some reason (instead of just accessing the indexes directly?) | |
foreach($infoTd as $k=>$td) { | |
$content = trim(pq($td)->text()); | |
if($k == 1) { | |
$elevation = $content; | |
} | |
else if($k == 3) { | |
$pctfull = $content; | |
} | |
else if($k == 4) { | |
$release = $content; | |
} | |
} | |
//$elevation = trim(pq($infoTd[1])->text()); | |
//$pctfull = trim(pq($infoTd[3])->text()); | |
//$release = trim(pq($infoTd[4])->text()); | |
$avgTr = pq("table tr:last-child")->prev("tr"); | |
$avgTd = $avgTr[0]->find("th"); | |
foreach($avgTd as $k=>$td) { | |
$content = trim(pq($td)->text()); | |
if($k == 1) { | |
$avgelevation = $content; | |
} | |
else if($k == 4) { | |
$avgrelease = $content; | |
} | |
} | |
print("Elevation: " . $elevation); | |
print("Percent Full" . $pctfull); | |
print("Release" . $release); | |
print("Avg. Elevation" . $avgelevation); | |
print("Avg. Release" . $avgrelease); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment