Skip to content

Instantly share code, notes, and snippets.

@jaketoolson
Created October 31, 2012 15:55
Show Gist options
  • Save jaketoolson/3987852 to your computer and use it in GitHub Desktop.
Save jaketoolson/3987852 to your computer and use it in GitHub Desktop.
PHP Sample Code3
// Jake Toolson
// This code is used to scrape raw data from websites which is then stored and used at a later time.
// This code incorporates the use of curl as well as the AWESOME phpQuery DOM library.
// Error handling is done using the throw and catch handlers. Errors are logged with a custom log class.
// This is run at the command prompt level.
ob_start();
try
{
$urls = array();
foreach ($stations as $station)
{
$eachTimeStart = microtime(true);
echo $processing;
flushIt();
/**
* Begin scraping data from the source.
*/
$url = (private);
$ch = new curlr($url);
$html = $ch->return_doc();
$doc = phpQuery::newDocument($html);
$option = $doc->find("table tr:nth-child(2) td:nth-child(5)");
// Get's all option values and puts them in array.
$url = pq($option)->find('a')->attr('href');
$urls[] = $url;
$sql = "UPDATE tbl_ferc_eqr_companies_2011_q4 SET dl_link ='".$url."' WHERE name = '".urldecode($station)."'";
$timeEnd = round(microtime(true)-$timeStart,6);
if ( ! $dbh->exec($sql))
{
echo "Insert Failed on ".urldecode($station).".\n";
flushIt();
}
else
{
$eachTimeEnd = round(microtime(true)-$eachTimeStart,6);
echo "Insert Successful. Took ".$eachTimeEnd." seconds.\n";
flushIt();
}
}
print_r($urls);
}
catch(Exception $e)
{
handleErrors("",$e->getMessage(),__FILE__, __LINE__);
}
$log->write('Scrape "'.__FILE__.'"-Completed. Took '.$timeEnd.' seconds.', 'NOTICE');
ob_end_flush();
/**
* @Class logger
*
* @example usage
* $log = logger::getInstance();
* $log->logfile = 'errors.log';
* $log->write('An error has occured','error', __FILE__, __LINE__);
*
*/
class logger
{
/*** Declare instance ***/
private static $instance = NULL;
/**
*
* @Constructor is set to private to stop instantion
*
*/
private function __construct()
{
}
/**
*
* @settor
* @access public
* @param string $name
* @param mixed $value
*
*/
public function __set($name, $value)
{
@set_error_handler(array($this,'handleErrors'), E_WARNING);
@set_exception_handler(array($this, 'handleExceptions'));
switch($name)
{
case 'logfile':
/** if(!file_exists($value) || !is_writeable($value))
{
throw new Exception("$value is not a valid file path");
}**/
$this->logfile = $value;
break;
default:
throw new Exception("$name cannot be set");
}
}
/**
*
* @write to the logfile
* @access public
* @param string $message
* @param string $file The filename that caused the error
* @param int $line The line that the error occurred on
* @return number of bytes written, false other wise
*
*/
public function write($message, $type=null, $file=null, $line=null)
{
$message = date("Y/m/d H:i:s") .' - '.strtoupper($type).' - '.$message;
$message .= is_null($file) ? '' : " in $file";
$message .= is_null($line) ? '' : " on line $line";
$message .= "\r\n";
return file_put_contents( $this->logfile, $message, FILE_APPEND );
}
/**
*
* Return logger instance or create new instance
* @return object (PDO)
* @access public
*/
public static function getInstance()
{
if (!self::$instance)
{
self::$instance = new logger;
}
return self::$instance;
}
/**
* Clone is set to private to stop cloning
*/
private function __clone()
{
}
/**
* custom error handler function
*
* @param int $errno
* @param string $errmsg
* @param string $errfile
* @param int $errline
*/
public function handleErrors($errno, $errmsg, $errfile, $errline)
{
$this->write('An error has occured '.$errno.' : '. $errmsg.'', 'error', $errfile, $errline);
echo(ERROR_CAPTURED);
}
/**
* custom error handler for exceptions
* @param string $errmsg
*/
public function handleExceptions($errmsg)
{
$this->write('An error has occured : '. $errmsg.'', 'error', __FILE__, __LINE__);
echo(ERROR_CAPTURED);
}
}
$log = logger::getInstance();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment