Created
June 4, 2015 12:58
-
-
Save ruthtillman/dc0781e34cb9fea483d7 to your computer and use it in GitHub Desktop.
DPLA API search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /* | |
| YES) allow the user to enter her own query terms into a form | |
| YES) only connect to the API when a user has entered a query into the form | |
| YES) validate and encode the user's input to prevent cross-site scripting and other errors | |
| YES) check that the HTTP request to the API did not result in an error | |
| YES) display an error message if no results are returned from the API | |
| YES) use at least one of your own functions in your PHP script | |
| YES) document your function using PHP comments | |
| Purpose of the form: Allow people to search the DPLA for photographs. They can enter keywords separated by comma-space. Handle paging. | |
| File: dpla-key.php includes my API key as $dpla_key. | |
| */ | |
| include_once('dpla-key.php'); | |
| /* | |
| Set Globals | |
| */ | |
| // Set yearMatchPattern for Regex to handle anything from the 18th century to this century. | |
| global $yearMatchPattern; | |
| $yearMatchPattern = '/(17|18|19|20)\d{2}/'; | |
| // Set thisYear as the current year to make this work in the future | |
| global $thisYear; | |
| $thisYear = intval(date('Y')); | |
| // Set Globals for error handling and tests | |
| global $not_start_date; | |
| $not_start_date = false; | |
| global $not_end_date; | |
| $not_end_date = false; | |
| global $startTrue; | |
| $startTrue = false; | |
| global $endTrue; | |
| $endTrue = false; | |
| /* | |
| Set Form Variables Using the Functions | |
| Set as global so that I can use them later in input fields for pagination function | |
| */ | |
| global $keywords; | |
| if (isset($_REQUEST['keywords'])) { | |
| $keywords = arrayKeywords(htmlspecialchars($_REQUEST['keywords'])); | |
| } | |
| else { | |
| $keywords = ''; | |
| } | |
| global $startDate; | |
| if (isset($_GET['startDate'])) { | |
| $startDate = sanitizeStartDate($_GET['startDate']); | |
| } | |
| else { | |
| $startDate = ''; | |
| } | |
| global $endDate; | |
| if (isset($_GET['endDate'])) { | |
| $endDate = sanitizeEndDate($_GET['endDate']); | |
| } | |
| else { | |
| $endDate = ''; | |
| } | |
| global $pageCount; | |
| global $pageCountError; | |
| // Error handling in case people submit non-numerical page numbers. While intval would work, page 0 doesn't play well with the system, so this just sets it to 1 and gives them a message explaining where they went wrong | |
| $pageCountError = ''; | |
| if (isset($_GET['pageCount']) && preg_match('/\d{1,5}/',$_GET['pageCount']) == true ) { | |
| $pageCount = intval($_GET['pageCount']); | |
| } | |
| elseif (isset($_GET['pageCount']) && preg_match('/\d{1,5}/',$_GET['pageCount']) == false ) { | |
| $pageCount = 1; | |
| $pageCountError = '<p class="red">Sorry, you entered an invalid page number. Please use the drop-down at the bottom of this page to generate page numbers for requests with more than 10 results.</p>'; | |
| } | |
| else { | |
| $pageCount = 1; | |
| } | |
| /* | |
| Use Form Variables to Set Further Test Functions | |
| */ | |
| if (strlen($keywords) === 0 && isset($_GET['submit'])) { | |
| $no_input = 'Please enter input keywords to complete the search.'; | |
| } | |
| else { | |
| $no_input = false; | |
| } | |
| $greaterThan = testDates($startDate,$endDate,$startTrue,$endTrue); | |
| ?> | |
| <html lang="en-US"> | |
| <meta http-equiv="Content-Type" content="text/html" charset="UTF-8" /> | |
| <head> | |
| <title>Form to return items from the DPLA</title> | |
| <link href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css" rel="stylesheet"/> | |
| <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> | |
| <link rel="stylesheet" type="text/css" href="week-4-style.css" /> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <div id="main" class="col-md-8 col-sm-12 col-xs-12"> | |
| <?php | |
| echo $pageCountError; | |
| if (isset($_GET['submit']) && strlen($keywords) === 0 || ($startTrue == true && $endTrue == true && $greaterThan != false)) { | |
| // Battery of tests. Is submit set AND are there keywords? Are the start/end dates correct? If not, do not run the test. | |
| echo '<p class="red main-error">Your request cannot be processed. Please see the form and correct your error.</p>'; | |
| } | |
| elseif (strlen($keywords) > 0 && $not_end_date === false && $not_start_date === false) { | |
| // If that's not true, then are there keywords AND are neither of the two flags up for bad dates? Ok, THEN we get the URI with whatever kind of data we pass into it. | |
| $url = fetchDPLA($keywords, $startDate, $endDate, $dpla_key, $pageCount); | |
| echo handleDPLA($url); | |
| } | |
| elseif (isset($_GET['submit'])) { | |
| // The first battery was specifically setting up some things for elseif. This elseif covers issues with date problems with than last date being greater than first date. Just making sure the form has been submitted so it doesn't display right up front. | |
| echo '<p class="red main-error">Your request cannot be processed. Please see the form and correct your error.</p>'; | |
| } | |
| elseif (!isset($_GET['submit'])) { | |
| echo '<p class="waiting">Waiting for you to search...</p>'; | |
| } | |
| ?> | |
| </div> <!-- Main --> | |
| <div id="sidebar" class="col-md-4 col-sm-12 col-xs-12"> | |
| <h1>Search the DPLA!</h1> | |
| <p>Use the form below to enter keywords for the images. Separate multiple keywords with a <em>comma</em>. Multiple keywords will be handled with a boolean AND. You can also specify start and end dates to limit your search.</p> | |
| <form method="GET" action="week-4.php"> | |
| <label for="keywords">Keywords (required):</label> | |
| <input type="textfield" id="keywords" name="keywords" value="<?php echo(htmlspecialchars($_REQUEST['keywords'])); ?>" /> <span class="red"><?php echo $no_input; ?></span><br /> | |
| <label for="startDate">Start Date:</label> (enter 4-digit year) <input type="textfield" id="startDate" name="startDate" value="<?php echo $startDate; ?>" /> <span class="red"><?php echo $not_start_date;?></span><br /> | |
| <label for="endDate">End Date:</label> (enter 4-digit year) <input type="textfield" id="endDate" name="endDate" value="<?php echo $endDate; ?>" /> <span class="red"><?php echo $not_end_date;?> <?php echo $greaterThan; ?></span><br /> | |
| <input type="submit" name="submit" value="submit"> | |
| </form> | |
| </div><!-- sidebar--> | |
| </div><!-- container--> | |
| </html> | |
| <?php | |
| // Setting up my functions down here+ | |
| // Sanitize Start Date | |
| function sanitizeStartDate($date) { | |
| // call in globals to be used or reset | |
| global $not_start_date; | |
| global $thisYear; | |
| global $startTrue; | |
| // make sure there's actually some kind of data in the date | |
| if ($date == false) { | |
| $date = ''; | |
| } | |
| // test to make sure the date is numeric, to see if it's from 1700 to present, and to make sure it's not from a time after this year | |
| elseif (is_numeric($date) && preg_match('/(17|18|19|20)\d{2}/', $date) != false && $date <= $thisYear ) { | |
| $date = intval($date); | |
| $startTrue = true; | |
| } | |
| // if there IS data in the date but it's not numeric OR it's not from 1700-present OR it's later than this year | |
| else { | |
| $not_start_date = 'This is not a valid year. Please enter any year between 1700 and ' . $thisYear; | |
| $date = intval($date); | |
| } | |
| // returns the info to be worked on | |
| return $date; | |
| } | |
| function sanitizeEndDate($date) { | |
| // call in globals to be used or reset | |
| global $not_end_date; | |
| global $thisYear; | |
| global $endTrue; | |
| // make sure there's actually some kind of data in the date | |
| if ($date == false) { | |
| $date = ''; | |
| } | |
| // test to make sure the date is numeric, to see if it's from 1700 to present, and to make sure it's not from a time after this year | |
| elseif (is_numeric($date) && preg_match('/(17|18|19|20)\d{2}/', $date) != false && $date <= $thisYear ) { | |
| $date = intval($date); | |
| $endTrue = true; | |
| } | |
| // if there IS data in the date but it's not numeric OR it's not from 1700-present OR it's later than this year | |
| else { | |
| $not_end_date = 'This is not a valid year. Please enter any year between 1700 and ' . $thisYear; | |
| $date = intval($date); | |
| } | |
| // returns the info to be worked on | |
| return $date; | |
| } | |
| function arrayKeywords($input) { | |
| // Takes the keywords input and turns it into an array that gets processed to handle multi-word keywords, etc. | |
| global $keywordsTrue; | |
| if ($input == false) { | |
| $input = ''; | |
| } else { | |
| $input = explode(',',$input); | |
| if ($input[1] == true) { | |
| //tests to make sure it's an actual array | |
| foreach ($input as $key) { | |
| if (stripos(trim($key), ' ') == true) { | |
| $key = '"' . str_replace(' ', '+', trim($key)) . '"'; | |
| } | |
| else { | |
| $key = trim($key); | |
| } | |
| $keystring .= $key . '+AND+'; | |
| // removes it from being an array and also splits a final +AND+ off the end. There must be a better way to do it but this was much simpler than messing with loops right now. | |
| $input = substr($keystring, 0, -5); | |
| } | |
| } | |
| // if it's not an array, just make it a simple value | |
| else $input = $input[0]; | |
| $keywordsTrue = true; | |
| } | |
| return $input; | |
| } | |
| function testDates($startDate,$endDate,$startTrue,$endTrue) { | |
| // Take the date inputs and make sure that the dates don't cause an issue by being in the wrong order | |
| if ($startDate > $endDate && $startTrue == true && $endTrue == true) { | |
| $output = 'End date must not be earlier than the start date.'; | |
| } | |
| else { | |
| $output = false; | |
| } | |
| return $output; | |
| } | |
| function fetchDPLA($input, $start, $end, $key, $page) { | |
| // Sets up the DPLA url generation. | |
| global $startTrue; | |
| global $endTrue; | |
| global $greaterThan; | |
| $url = 'http://api.dp.la/v2/items?'; | |
| // Have to do the params test this way because if I enter any empty fields, the API doesn't like it. | |
| if ($startTrue == true && $endTrue == true && $greaterThan === false) { | |
| $params = array( | |
| 'sourceResource.title' => $input, | |
| 'sourceResource.date.after' => $start, | |
| 'sourceResource.date.before' => $end, | |
| 'page_size' => '10', | |
| 'page' => $page, | |
| 'api_key' => $key | |
| ); | |
| } | |
| elseif ($startTrue == true && $greaterThan === false) { | |
| $params = array( | |
| 'sourceResource.title' => $input, | |
| 'sourceResource.date.after' => $start, | |
| 'page_size' => '10', | |
| 'page' => $page, | |
| 'api_key' => $key | |
| ); | |
| } | |
| elseif ($endTrue == true && $greaterThan === false) { | |
| $params = array( | |
| 'sourceResource.title' => $input, | |
| 'sourceResource.date.before' => $end, | |
| 'page_size' => '10', | |
| 'page' => $page, | |
| 'api_key' => $key | |
| ); | |
| } | |
| else { | |
| $params = array( | |
| 'sourceResource.title' => $input, | |
| 'page_size' => '10', | |
| 'page' => $page, | |
| 'api_key' => $key | |
| ); | |
| } | |
| return $url . http_build_query($params); | |
| } | |
| function handleDPLA($url) { | |
| // handles all the intricacies of handling the DPLA results and returns a variable $html with the HTML in it | |
| global $pageCount; | |
| global $keywords; | |
| global $startDate; | |
| global $endDate; | |
| global $pages; | |
| $response = json_decode(file_get_contents($url)); | |
| $headers = $http_response_header; | |
| // The best situation, good header results and response greater than 0 | |
| if ($headers [0] === 'HTTP/1.1 200 OK' && $response->count > 0) { | |
| $results = $response->docs; | |
| if ($response->count > 10) { | |
| if ($response->count % 10 == 0) { | |
| $pages = intval(($response->count / 10)); | |
| } | |
| else { | |
| $pages = intval(($response->count / 10) + 1); | |
| } | |
| } | |
| $html = '<h1>Your search returned ' . $response->count; | |
| if ($response->count == 1) { | |
| $html .= ' result:</h1>'; | |
| } | |
| else { | |
| $html .= ' results:</h1>'; | |
| } | |
| if ($response->count > 10) { | |
| $html .= '<p>Showing page ' . $pageCount . ' of ' . $pages . '.</p>'; | |
| } | |
| foreach ($results as $result) { | |
| // The basic result parsing. Since both titles and descriptions may return arrays, I had to set up potential handling. | |
| if (is_array($result->sourceResource->title) == 'Array') { | |
| $title = ''; | |
| foreach ($result->sourceResource->title as $titles) { | |
| $title .= $titles . ' '; | |
| } | |
| } | |
| else { | |
| $title = $result->sourceResource->title; | |
| } | |
| if (is_array($result->sourceResource->description) == 'Array') { | |
| $description = ''; | |
| foreach ($result->sourceResource->description as $describe){ | |
| $description .= '<p>' . $describe . '</p>'; | |
| } | |
| } | |
| else { | |
| $description = '<p>' . $result->sourceResource->description . '</p>'; | |
| } | |
| $html .= '<div class="clear">'; | |
| $html .= '<h2>'; | |
| $html .= $title; | |
| $html .= '</h2>'; | |
| if ($result->object !='') { | |
| $html .= '<img src="' . $result->object . '" alt="" class="alignleft" />'; | |
| } | |
| $html .= $description; | |
| $html .= '<p><a href="' . $result->isShownAt . '">View at source</a></p>'; | |
| $html .= '</div>'; | |
| } | |
| if ($response->count <= 10) { | |
| } | |
| // Really wanted to create something which could handle multiple pages of inputs. Because one can get so many, I decided it had to be done as a dropdown. Another option would've been to do individual links for the pages which would avoid the input issue, but hidden inputs was perfect. | |
| else { | |
| $html .= '<div class="clear more-pages"><p>'; | |
| // $html .= 'Page '; | |
| // $html .= $pageCount; | |
| // $html .= ' of '; | |
| // $html .= $pages; | |
| // $html .= '. '; | |
| $html .= '<form method="GET" action="week-4.php?keywords=balloon&startDate=&endDate=">'; | |
| $html .= '<input type="hidden" name="keywords" value="' . $keywords . '"/>'; | |
| $html .= '<input type="hidden" name="startDate" value="' . $startDate . '"/>'; | |
| $html .= '<input type="hidden" name="endDate" value="' . $endDate . '"/>'; | |
| $html .= '<label for="listPages">View page:</label> '; | |
| $html .= '<select id="listPages" name="pageCount">'; | |
| $paginationIterate = 1; | |
| while ($paginationIterate <= $pages) { | |
| $html .= '<option value="' . $paginationIterate . '">' . $paginationIterate . '</option>'; | |
| $paginationIterate++; | |
| } | |
| $html .= '</select>'; | |
| $html .= ' <input type="submit" name="submit" value="submit"></form>'; | |
| $html .= '</p></div>'; | |
| } | |
| } | |
| // Next testing for the simplest alternative, no results from DPLA | |
| elseif ($headers [0] === 'HTTP/1.1 200 OK' && $response->count == 0) { | |
| $html = '<h1>Your request had 0 results</h1>'; | |
| $html .= '<p>Sorry, the DPLA didn\'t have any results for that request. If you had date restrictions, try loosening them up* or try another search.</p>'; | |
| $html .= '<p>*The date field search options in the DPLA API don\'t handle "circa" situations at this point. If the date is "circa," the fields for end & begin are null.</p>'; | |
| } | |
| // Handling if the headers aren't right | |
| elseif ($headers [0] != 'HTTP/1.1 200 OK') { | |
| $html = '<h1>There was something wrong with the request we received from DPLA</h1>'; | |
| $html .= '<p>DPLA either didn\'t receive the data correctly or had a problem sending the response back to us.</p>'; | |
| } | |
| // Handling if everything in the world is wrong | |
| else { | |
| $html = '<h1>Unknown Error</h1>'; | |
| $html .= '<p>An unknown error has occurred.</p>'; | |
| $html .= '<p>In real life this is where you\'d put 404 contact info or whatever.</p>'; | |
| } | |
| // Such a little return to have so much info. | |
| return $html; | |
| } | |
| ?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment