Created
December 13, 2014 16:27
-
-
Save Asbra/84b32d4b31a9914c4cf2 to your computer and use it in GitHub Desktop.
PHP Craigslist job finder (scraper)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Craigslist job finder | |
* Scans a given Craigslist section, checks titles for keyword(s), then emails when found. | |
* @author Johan <[email protected]> | |
* @date 2014-11-10 | |
* @modified 2014-11-10 | |
*/ | |
// the $keyword variable contains the keywords you want to find | |
// $keyword = 'programmer'; | |
// it supports regular expressions | |
// $keyword = '/(programm?er|coder)/i'; | |
// and also supports taking an array of keywords | |
// $keyword = array('programmer', 'coder'); | |
$keyword = array('model', 'actress'); | |
// The city to look in, it is the subdomain ie. newyork.craigslist.org | |
$city = 'newyork'; | |
// Section to search for the keyword(s) in | |
$section = 'jjj'; // jjj=jobs | |
// Email settings | |
// Email address to send to and subject line of the email | |
$email_to = '[email protected]'; | |
$email_subject = 'CraigsList jobs found!'; | |
/////////////////////////////////////////////////////////////////////////////// | |
// Do not modify below this line | |
// Depends on my cURL wrapper http://asbra.net/php-curl-class-snippet-tutorial/ | |
require 'curl.php'; | |
$curl = new cURL(); | |
$url = 'http://'.$city.'.craigslist.org/search/'.$section; | |
$curl->get($url); | |
// Regex to match the links | |
$regex = '/<a[^>]*?href="([^"]+)"[^>]*?class="hdrlnk"[^>]*?>([^<]+)<\/a>/'; | |
preg_match_all($regex, $curl->data, $matches); | |
echo 'Found '.count($matches[0])." jobs total\r\n"; | |
set_error_handler(function() { /* ignore errors */ }); | |
$found = 0; | |
function search($keyword) | |
{ | |
global $matches; | |
for ($i = 0; $i < count($matches[0]); $i++) | |
{ | |
$link = $matches[1][$i]; | |
$title = $matches[2][$i]; | |
$html = '<a href="'.$link.'">'.$title.'</a>'."\r\n"; | |
echo "Searching for keyword {$keyword} in '{$title}'\r\n"; | |
if (stripos($title, $keyword) !== false || preg_match($keyword, $title) === 1) | |
{ | |
global $found; | |
$found++; | |
echo "{$title}\r\n"; | |
return $html; | |
} | |
} | |
return false; | |
} | |
$html = ''; | |
if (is_array($keyword)) | |
{ | |
foreach ($keyword as $kw) | |
{ | |
$html .= search($kw); | |
} | |
} | |
else | |
{ | |
$html = search($keyword); | |
} | |
if (!empty($html)) { | |
mail($email_to, $email_subject, $html); | |
echo "Sent email\r\n{$html}\r\n"; | |
} | |
restore_error_handler(); | |
echo "Found {$found} matching jobs\r\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment