Skip to content

Instantly share code, notes, and snippets.

@davidmintz
Last active January 9, 2018 19:42
Show Gist options
  • Save davidmintz/bc0b88cdaff7fb9040ad6f18dfa0f2c0 to your computer and use it in GitHub Desktop.
Save davidmintz/bc0b88cdaff7fb9040ad6f18dfa0f2c0 to your computer and use it in GitHub Desktop.
scrapes official holidays from the Court's official website and inserts in our database
<?php
/**
* CLI script to scrape holidays from the SDNY website and store them in our database.
* Normally you won't need this more than about once a year. And of course shit will
* break if the HTML changes
*/
exec('hostname',$hostname);
$host = $hostname[0];
$year = date('Y');
$command = "curl -s http://nysd.uscourts.gov/holidays|egrep '<tr><td><strong>.+<small>'| perl -p -e \"s/<\/?[^>]+>|\(.+\)|^\s+//g\"";
exec($command, $lines, $retval);
if ("0" != $retval) {
exit("oops, non-zero exit status from command:\n $command\n");
}
if (count($lines) < 10) {
exit("downloaded data looks suspiciously short. please have a look and try a again\n");
}
/** the labels in our database look like:
1 New Year's Day
2 Martin Luther King Day
3 Lincoln's Birthday
4 President's Day
5 Memorial Day
6 Independence Day
7 Labor Day
8 Columbus Day
9 Veterans' Day
10 Thanksgiving
11 Christmas
12 Election Day
* ------------------ */
/** web-scraped data should look like:
New Year's Day - January 1st
Martin Luther King, Jr. Birthday - January 15th
Washington's Birthday / President's Day - February 19th
Memorial Day - May 28th
Independence Day - July 4th
Labor Day - September 3rd
Columbus Day - October 8th
Veteran's Day - November 12th
Thanksgiving Day - November 22nd
Friday November 23rd, 2018 - November 23rd
Christmas Eve - December 24th
Christmas Day - December 25th
New Year's Eve - December 31st
*/
$db_params = parse_ini_file(getenv('HOME').'/.my.cnf');
$database = strstr($host,'interps') === false ? 'dev_interpreters' : 'interpreters';
$db = new PDO("mysql:host=localhost;dbname=$database", $db_params['user'], $db_params['password']);
$holidays = $db->query('SELECT * FROM holidays',PDO::FETCH_KEY_PAIR)->fetchAll();
$sql = 'INSERT INTO court_closings (date,holiday_id) VALUES (:date,:holiday_id)';
$statement = $db->prepare($sql);
foreach($lines as $line) {
list($name,$date) = preg_split('/\s+-\s+/', $line);
$holiday_id = array_search($name, $holidays);
if ($holiday_id === false) {
if (stristr($name,'Martin Luther King')) {
$holiday_id = array_search('Martin Luther King Day', $holidays);
} elseif (stristr($name,'President')) {
$holiday_id = array_search('President\'s Day', $holidays);
} elseif (stristr($name,'Veteran')) {
$holiday_id = array_search('Veterans\' Day', $holidays);
} elseif (stristr($name,'Thanksgiving')) {
$holiday_id = array_search('Thanksgiving', $holidays);
} elseif (stristr($name,'Christmas')) {
$holiday_id = array_search('Christmas', $holidays);
} elseif (stristr($name,'New Year')) {
$holiday_id = array_search('New Year\'s Day', $holidays);
}
if (! $holiday_id) {
echo "\nnot found: $name\n";
print_r($holidays);
while (! $holiday_id ) {
$holiday_id = readline('Choose one of the above ids, or CTRL+C to bail: ');
if (! key_exists($holiday_id, $holidays)) {
$holiday_id = false;
echo "not a valid id, try again\n";
}
}
}
}
$timestamp = strtotime("$date, $year");
if (false === $timestamp) {
echo "WARNING: could not parse date (for $name) from '$date'\n";
continue;
}
$formatted_date = date('Y-m-d',$timestamp);
try {
$statement->execute([':date'=>$formatted_date,':holiday_id'=>$holiday_id]);
echo "added $holidays[$holiday_id] on $formatted_date\n";
} catch (Exception $e) {
printf("oops, caught exception %s: %s\n",get_class($e),$e->getMessage());
echo "moving on...\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment