Created
June 13, 2012 10:19
-
-
Save chrisns/2923257 to your computer and use it in GitHub Desktop.
socialsecurity popularnames technical test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// queries http://www.socialsecurity.gov/cgi-bin/popularnames.cgi | |
// allows user to enter years to aggregate stats over, allows user to limit thei caches if possible | |
// Given the extra time | |
// * Functions would have doxygen documentation around them | |
// * All functions would assert clearly and bail early if they weren't given expected variables | |
// * Rationalise mixed use of camelCase and snake_case | |
// * More comments | |
// * More helper functions and abstract the page function into a separate one when there is a $_GET submission | |
// * Maybe written an abstraction of the apc read/writes so it could fall back to serialised file writes | |
// * Make the previously selected values display in the inputs | |
// * Use selects rather than inputs | |
// * Make the table sortable in php/javascript | |
// * Made it more obvious that if you don't specify a limit it will display all it can | |
// * Written some css to make it more readable | |
// * Made the table semantic | |
// * Allowed the user to retrieve the response in different formats | |
// * Used a pear class to help write and validate the form elements e.g http://pear.php.net/manual/en/package.html.html-quickform.tutorial.php | |
// In hindsight | |
// * I probably wouldn't have written the dom entirely in php as I spent far to long on it and the code isn't as readable as I wanted it to be | |
// * There might be a faster way to traverse the dom than xpath, I just went with what I knew I could do quickly | |
// * I could/should have written what processes the xpath into something safer than what it is | |
// * While I've limited exposure to Symfony I should have used that to do some of the leg work rather than reinventing the wheel in a few places | |
// * I presume the test was written a while ago, but since you specified the year as 2010 I made it restrict to that rather than the relative last year since 2011 data is available | |
class mytest { | |
public static $entrypoint = "http://www.socialsecurity.gov/cgi-bin/popularnames.cgi"; | |
public static $apc_prefix = "mytest_"; | |
public static $apc_ttl = 360; // cache responses for 5 mins | |
function __construct() { | |
// check for some basics | |
if (!function_exists("apc_fetch")) | |
die("We need apc"); | |
if (!function_exists("curl_init")) | |
die("We need curl"); | |
} | |
private function make_http_request($year) { | |
$ch = curl_init(); | |
$query['year'] = $year; | |
$query['top'] = 1000; | |
$query['number'] = "n"; | |
curl_setopt($ch, CURLOPT_URL, self::$entrypoint); | |
curl_setopt($ch, CURLOPT_POST, TRUE); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); | |
curl_setopt($ch, CURLOPT_FAILONERROR, TRUE); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 180); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($query)); | |
$return = curl_exec($ch); | |
curl_close($ch); | |
if (!$return) { | |
$http_error_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); | |
die($http_error_code . ': Unable to connect to server'); | |
} | |
return $return; | |
} | |
private function get_and_make_array_of_year($year) { | |
$rawresponse = self::make_http_request($year); | |
$doc = new DOMDocument(); | |
$doc->loadHTML($rawresponse); | |
// @todo insert error handling if dom is not parsed correctly | |
$xpath = new DOMXpath($doc); // create an xpath addressable object | |
// @todo insert error handling if we can't get an xpath handler on dom | |
// @todo refactor this xpath with something that dosen't require the gross stuff beneath | |
// @todo maybe allow you to select women too | |
$elements = $xpath->query("//table[@summary='Popularity for top 1000']/tr[position() >= 2]/td[position() = 2 or position() = 3 ]"); | |
$data = array(); | |
foreach ($elements as $node) { | |
if (!$key) { // if we're dealing with the name just take that and clear it later | |
$key = $node->nodeValue; | |
continue; | |
} | |
// todo insert some error handling if we don't get an integer back or they change their formatting | |
$data[$key] = str_replace(',', '', $node->nodeValue) * 1; // remove commas and make sure its stored as an int | |
unset($key); | |
} | |
return $data; | |
} | |
private function fetch_year_data($year) { | |
if (!is_numeric($year)) | |
die("year is not numeric"); | |
$data = apc_fetch(self::$apc_prefix . $year); | |
if ($data) | |
return $data; // if we have this year cached then just return it | |
$data = self::get_and_make_array_of_year($year); | |
apc_store(self::$apc_prefix . $year, $data, self::$apc_ttl); | |
return $data; | |
} | |
private function merge_year_data($start_year, $end_year, $limit) { | |
if (!$limit) unset($limit); | |
$alldata = array_fill($start_year, $end_year - $start_year + 1, array()); | |
// first just get some full big formed data array to work with | |
foreach ($alldata as $year => &$data) { | |
$data = self::fetch_year_data($year); | |
} | |
unset($data); | |
$merged_data = array(); | |
foreach ($alldata as $year) { // process a year | |
foreach ($year as $name => $count) { // merge all the years data together | |
// if we've already seen this name | |
if (isset($merged_data[$name])) { | |
$merged_data[$name] += $count; | |
} | |
else { | |
$merged_data[$name] = $count; | |
} | |
} | |
} | |
arsort($merged_data); | |
// now we've got a massive correct dataset make it manageable and delete a load of it if a limit has been set | |
if ($limit) | |
array_splice($merged_data, $limit); | |
return $merged_data; | |
} | |
public function page() { | |
$document = DOMImplementation::createDocument(null, 'html', DOMImplementation::createDocumentType("html", "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd")); | |
$document->formatOutput = true; | |
$html = $document->documentElement; | |
$head = $document->createElement('head'); | |
$title = $document->createElement('title'); | |
$text = $document->createTextNode('CNS Test'); | |
$body = $document->createElement('body'); | |
$title->appendChild($text); | |
$head->appendChild($title); | |
$html->appendChild($head); | |
$html->appendChild($body); | |
$form = $document->createElement('form'); | |
$form_inputs = array( | |
"limit" => "Limit", | |
"startyear" => "Start Year", | |
"endyear" => "End Year", | |
); | |
$value_attribute = $document->createAttribute('value'); | |
$label_attribute = $document->createAttribute('for'); | |
foreach ($form_inputs as $name => $label) { | |
$label = $document->createElement('label', $label); | |
$label_attribute->value = $name; | |
$label->appendChild($label_attribute); | |
$form->appendChild($label); | |
$input = $document->createElement('input'); | |
$name_attribute = $document->createAttribute('name'); | |
$name_attribute->value = $name; | |
$input->appendChild($name_attribute); | |
$form->appendChild($input); | |
} | |
$submit_button = $document->createElement('input'); | |
$type_attribute = $document->createAttribute('type'); | |
$type_attribute->value = "submit"; | |
$value_attribute->value = "submit"; | |
$submit_button->appendChild($type_attribute); | |
$submit_button->appendChild($value_attribute); | |
$form->appendChild($submit_button); | |
$body->appendChild($form); | |
$limit = $_GET['limit'] * 1; | |
$startyear = $_GET['startyear'] * 1; | |
$endyear = $_GET['endyear'] * 1; | |
if (empty($_GET)) { | |
return $document->saveHTML(); | |
} | |
if ($startyear < 1880 || $endyear < 1180 || $startyear > 2010 || $endyear > 2010 || $startyear > $endyear) { | |
die("invalid date range"); | |
} | |
$table = $document->createElement("table"); | |
$table_head = $document->createElement("thead"); | |
$rowheads = array("Rank", "Name", "Total"); | |
foreach ($rowheads as $text) { | |
$table_head_th = $document->createElement("th", $text); | |
$table_head->appendChild($table_head_th); | |
} | |
$table->appendChild($table_head); | |
$table_body = $document->createElement("tbody"); | |
$table->appendChild($table_body); | |
$data = self::merge_year_data($startyear, $endyear, $limit); | |
$rank = 1; | |
foreach ($data as $name => $total) { | |
$tr = $document->createElement('tr'); | |
$table_body->appendChild($tr); | |
$td_rank = $document->createElement('td', $rank); | |
$td_name = $document->createElement('td', $name); | |
$td_total = $document->createElement('td', number_format($total)); | |
$tr->appendChild($td_rank); | |
$tr->appendChild($td_name); | |
$tr->appendChild($td_total); | |
$rank++; | |
} | |
$body->appendChild($table); | |
return $document->saveHTML(); | |
} | |
} | |
$documentcontroler = new mytest(); | |
print $documentcontroler->page(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment