Last active
January 9, 2018 19:42
-
-
Save davidmintz/bc0b88cdaff7fb9040ad6f18dfa0f2c0 to your computer and use it in GitHub Desktop.
scrapes official holidays from the Court's official website and inserts in our database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* CLI script to scrape holidays from the SDNY website and store them in our database. | |
* Normally you won't need this more than about once a year. And of course shit will | |
* break if the HTML changes | |
*/ | |
exec('hostname',$hostname); | |
$host = $hostname[0]; | |
$year = date('Y'); | |
$command = "curl -s http://nysd.uscourts.gov/holidays|egrep '<tr><td><strong>.+<small>'| perl -p -e \"s/<\/?[^>]+>|\(.+\)|^\s+//g\""; | |
exec($command, $lines, $retval); | |
if ("0" != $retval) { | |
exit("oops, non-zero exit status from command:\n $command\n"); | |
} | |
if (count($lines) < 10) { | |
exit("downloaded data looks suspiciously short. please have a look and try a again\n"); | |
} | |
/** the labels in our database look like: | |
1 New Year's Day | |
2 Martin Luther King Day | |
3 Lincoln's Birthday | |
4 President's Day | |
5 Memorial Day | |
6 Independence Day | |
7 Labor Day | |
8 Columbus Day | |
9 Veterans' Day | |
10 Thanksgiving | |
11 Christmas | |
12 Election Day | |
* ------------------ */ | |
/** web-scraped data should look like: | |
New Year's Day - January 1st | |
Martin Luther King, Jr. Birthday - January 15th | |
Washington's Birthday / President's Day - February 19th | |
Memorial Day - May 28th | |
Independence Day - July 4th | |
Labor Day - September 3rd | |
Columbus Day - October 8th | |
Veteran's Day - November 12th | |
Thanksgiving Day - November 22nd | |
Friday November 23rd, 2018 - November 23rd | |
Christmas Eve - December 24th | |
Christmas Day - December 25th | |
New Year's Eve - December 31st | |
*/ | |
$db_params = parse_ini_file(getenv('HOME').'/.my.cnf'); | |
$database = strstr($host,'interps') === false ? 'dev_interpreters' : 'interpreters'; | |
$db = new PDO("mysql:host=localhost;dbname=$database", $db_params['user'], $db_params['password']); | |
$holidays = $db->query('SELECT * FROM holidays',PDO::FETCH_KEY_PAIR)->fetchAll(); | |
$sql = 'INSERT INTO court_closings (date,holiday_id) VALUES (:date,:holiday_id)'; | |
$statement = $db->prepare($sql); | |
foreach($lines as $line) { | |
list($name,$date) = preg_split('/\s+-\s+/', $line); | |
$holiday_id = array_search($name, $holidays); | |
if ($holiday_id === false) { | |
if (stristr($name,'Martin Luther King')) { | |
$holiday_id = array_search('Martin Luther King Day', $holidays); | |
} elseif (stristr($name,'President')) { | |
$holiday_id = array_search('President\'s Day', $holidays); | |
} elseif (stristr($name,'Veteran')) { | |
$holiday_id = array_search('Veterans\' Day', $holidays); | |
} elseif (stristr($name,'Thanksgiving')) { | |
$holiday_id = array_search('Thanksgiving', $holidays); | |
} elseif (stristr($name,'Christmas')) { | |
$holiday_id = array_search('Christmas', $holidays); | |
} elseif (stristr($name,'New Year')) { | |
$holiday_id = array_search('New Year\'s Day', $holidays); | |
} | |
if (! $holiday_id) { | |
echo "\nnot found: $name\n"; | |
print_r($holidays); | |
while (! $holiday_id ) { | |
$holiday_id = readline('Choose one of the above ids, or CTRL+C to bail: '); | |
if (! key_exists($holiday_id, $holidays)) { | |
$holiday_id = false; | |
echo "not a valid id, try again\n"; | |
} | |
} | |
} | |
} | |
$timestamp = strtotime("$date, $year"); | |
if (false === $timestamp) { | |
echo "WARNING: could not parse date (for $name) from '$date'\n"; | |
continue; | |
} | |
$formatted_date = date('Y-m-d',$timestamp); | |
try { | |
$statement->execute([':date'=>$formatted_date,':holiday_id'=>$holiday_id]); | |
echo "added $holidays[$holiday_id] on $formatted_date\n"; | |
} catch (Exception $e) { | |
printf("oops, caught exception %s: %s\n",get_class($e),$e->getMessage()); | |
echo "moving on...\n"; | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment