Skip to content

Instantly share code, notes, and snippets.

@chrisns
Created June 13, 2012 10:19
Show Gist options
  • Save chrisns/2923257 to your computer and use it in GitHub Desktop.
Save chrisns/2923257 to your computer and use it in GitHub Desktop.
socialsecurity popularnames technical test
<?php
// queries http://www.socialsecurity.gov/cgi-bin/popularnames.cgi
// allows user to enter years to aggregate stats over, allows user to limit thei caches if possible
// Given the extra time
// * Functions would have doxygen documentation around them
// * All functions would assert clearly and bail early if they weren't given expected variables
// * Rationalise mixed use of camelCase and snake_case
// * More comments
// * More helper functions and abstract the page function into a separate one when there is a $_GET submission
// * Maybe written an abstraction of the apc read/writes so it could fall back to serialised file writes
// * Make the previously selected values display in the inputs
// * Use selects rather than inputs
// * Make the table sortable in php/javascript
// * Made it more obvious that if you don't specify a limit it will display all it can
// * Written some css to make it more readable
// * Made the table semantic
// * Allowed the user to retrieve the response in different formats
// * Used a pear class to help write and validate the form elements e.g http://pear.php.net/manual/en/package.html.html-quickform.tutorial.php
// In hindsight
// * I probably wouldn't have written the dom entirely in php as I spent far to long on it and the code isn't as readable as I wanted it to be
// * There might be a faster way to traverse the dom than xpath, I just went with what I knew I could do quickly
// * I could/should have written what processes the xpath into something safer than what it is
// * While I've limited exposure to Symfony I should have used that to do some of the leg work rather than reinventing the wheel in a few places
// * I presume the test was written a while ago, but since you specified the year as 2010 I made it restrict to that rather than the relative last year since 2011 data is available
class mytest {
public static $entrypoint = "http://www.socialsecurity.gov/cgi-bin/popularnames.cgi";
public static $apc_prefix = "mytest_";
public static $apc_ttl = 360; // cache responses for 5 mins
function __construct() {
// check for some basics
if (!function_exists("apc_fetch"))
die("We need apc");
if (!function_exists("curl_init"))
die("We need curl");
}
private function make_http_request($year) {
$ch = curl_init();
$query['year'] = $year;
$query['top'] = 1000;
$query['number'] = "n";
curl_setopt($ch, CURLOPT_URL, self::$entrypoint);
curl_setopt($ch, CURLOPT_POST, TRUE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_FAILONERROR, TRUE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 180);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($query));
$return = curl_exec($ch);
curl_close($ch);
if (!$return) {
$http_error_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
die($http_error_code . ': Unable to connect to server');
}
return $return;
}
private function get_and_make_array_of_year($year) {
$rawresponse = self::make_http_request($year);
$doc = new DOMDocument();
$doc->loadHTML($rawresponse);
// @todo insert error handling if dom is not parsed correctly
$xpath = new DOMXpath($doc); // create an xpath addressable object
// @todo insert error handling if we can't get an xpath handler on dom
// @todo refactor this xpath with something that dosen't require the gross stuff beneath
// @todo maybe allow you to select women too
$elements = $xpath->query("//table[@summary='Popularity for top 1000']/tr[position() >= 2]/td[position() = 2 or position() = 3 ]");
$data = array();
foreach ($elements as $node) {
if (!$key) { // if we're dealing with the name just take that and clear it later
$key = $node->nodeValue;
continue;
}
// todo insert some error handling if we don't get an integer back or they change their formatting
$data[$key] = str_replace(',', '', $node->nodeValue) * 1; // remove commas and make sure its stored as an int
unset($key);
}
return $data;
}
private function fetch_year_data($year) {
if (!is_numeric($year))
die("year is not numeric");
$data = apc_fetch(self::$apc_prefix . $year);
if ($data)
return $data; // if we have this year cached then just return it
$data = self::get_and_make_array_of_year($year);
apc_store(self::$apc_prefix . $year, $data, self::$apc_ttl);
return $data;
}
private function merge_year_data($start_year, $end_year, $limit) {
if (!$limit) unset($limit);
$alldata = array_fill($start_year, $end_year - $start_year + 1, array());
// first just get some full big formed data array to work with
foreach ($alldata as $year => &$data) {
$data = self::fetch_year_data($year);
}
unset($data);
$merged_data = array();
foreach ($alldata as $year) { // process a year
foreach ($year as $name => $count) { // merge all the years data together
// if we've already seen this name
if (isset($merged_data[$name])) {
$merged_data[$name] += $count;
}
else {
$merged_data[$name] = $count;
}
}
}
arsort($merged_data);
// now we've got a massive correct dataset make it manageable and delete a load of it if a limit has been set
if ($limit)
array_splice($merged_data, $limit);
return $merged_data;
}
public function page() {
$document = DOMImplementation::createDocument(null, 'html', DOMImplementation::createDocumentType("html", "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"));
$document->formatOutput = true;
$html = $document->documentElement;
$head = $document->createElement('head');
$title = $document->createElement('title');
$text = $document->createTextNode('CNS Test');
$body = $document->createElement('body');
$title->appendChild($text);
$head->appendChild($title);
$html->appendChild($head);
$html->appendChild($body);
$form = $document->createElement('form');
$form_inputs = array(
"limit" => "Limit",
"startyear" => "Start Year",
"endyear" => "End Year",
);
$value_attribute = $document->createAttribute('value');
$label_attribute = $document->createAttribute('for');
foreach ($form_inputs as $name => $label) {
$label = $document->createElement('label', $label);
$label_attribute->value = $name;
$label->appendChild($label_attribute);
$form->appendChild($label);
$input = $document->createElement('input');
$name_attribute = $document->createAttribute('name');
$name_attribute->value = $name;
$input->appendChild($name_attribute);
$form->appendChild($input);
}
$submit_button = $document->createElement('input');
$type_attribute = $document->createAttribute('type');
$type_attribute->value = "submit";
$value_attribute->value = "submit";
$submit_button->appendChild($type_attribute);
$submit_button->appendChild($value_attribute);
$form->appendChild($submit_button);
$body->appendChild($form);
$limit = $_GET['limit'] * 1;
$startyear = $_GET['startyear'] * 1;
$endyear = $_GET['endyear'] * 1;
if (empty($_GET)) {
return $document->saveHTML();
}
if ($startyear < 1880 || $endyear < 1180 || $startyear > 2010 || $endyear > 2010 || $startyear > $endyear) {
die("invalid date range");
}
$table = $document->createElement("table");
$table_head = $document->createElement("thead");
$rowheads = array("Rank", "Name", "Total");
foreach ($rowheads as $text) {
$table_head_th = $document->createElement("th", $text);
$table_head->appendChild($table_head_th);
}
$table->appendChild($table_head);
$table_body = $document->createElement("tbody");
$table->appendChild($table_body);
$data = self::merge_year_data($startyear, $endyear, $limit);
$rank = 1;
foreach ($data as $name => $total) {
$tr = $document->createElement('tr');
$table_body->appendChild($tr);
$td_rank = $document->createElement('td', $rank);
$td_name = $document->createElement('td', $name);
$td_total = $document->createElement('td', number_format($total));
$tr->appendChild($td_rank);
$tr->appendChild($td_name);
$tr->appendChild($td_total);
$rank++;
}
$body->appendChild($table);
return $document->saveHTML();
}
}
$documentcontroler = new mytest();
print $documentcontroler->page();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment