Created
June 16, 2017 21:10
-
-
Save robwent/4b22022db471d961e04f7ebec2091ef6 to your computer and use it in GitHub Desktop.
PHP script to read Country and Zip information from a CSV file, look up the address from the Nominatim API and save the data to a new file including the new information.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Turn off all error reporting | |
// We don't want notices screwing up and responses | |
error_reporting(0); | |
$file = 'mailing_list.csv'; | |
$resultfile = 'mailing_list_nominatim_location.csv'; | |
$start = 1; | |
$chunk = 20; | |
function toCSV($data, $outstream, $header=true) { | |
if (count($data)) { | |
if ($header) { | |
fputcsv($outstream, array_keys($data[0])); | |
} | |
foreach ($data as $row) { | |
fputcsv($outstream, $row); | |
} | |
} | |
} | |
function geocode($address){ | |
// google map geocode api url | |
$url = "http://nominatim.openstreetmap.org/search?{$address}&format=json&&addressdetails=1"; | |
// get the json response | |
$resp_json = file_get_contents($url); | |
// decode the json | |
$resp = json_decode($resp_json, true); | |
// get the important data | |
$lat = !empty($resp[0]['lat']) ? $resp[0]['lat'] : ''; | |
$lng = !empty($resp[0]['lon']) ? $resp[0]['lon'] : ''; | |
$formatted_address = !empty($resp[0]['display_name']) ? $resp[0]['display_name'] : ''; | |
$state = !empty($resp[0]['address']['state']) ? $resp[0]['address']['state'] : ''; | |
$county = !empty($resp[0]['address']['county']) ? $resp[0]['address']['county'] : ''; | |
$city = !empty($resp[0]['address']['city']) ? $resp[0]['address']['city'] : ''; | |
$village = !empty($resp[0]['address']['village']) ? $resp[0]['address']['village'] : ''; | |
$hamlet = !empty($resp[0]['address']['hamlet']) ? $resp[0]['address']['hamlet'] : ''; | |
// put the data in the array | |
$data_arr = array( | |
'lat' => $lat, | |
'lng' => $lng, | |
'formattedaddress' => $formatted_address, | |
'state' => $state, | |
'county' => $county, | |
'city' => $city, | |
'village' => $village, | |
'hamlet' => $hamlet | |
); | |
return $data_arr; | |
} | |
function _combine_array(&$row, $key, $header) { | |
$row = array_combine($header, $row); | |
} | |
function process($start=1, $chunk=20, $file=null, $resultfile=null) { | |
$csv = array_map('str_getcsv', file($file)); | |
$header = array_shift($csv); | |
array_walk($csv, '_combine_array', $header); | |
$total_rows = count($csv); | |
// If the last number to be processed is larger than the total then this is the last chunk | |
$continue = (($start + $chunk) > $total_rows) ? false : true; | |
//Cut out the rows we want to process | |
$lines_to_process = array_slice($csv, $start, $chunk); | |
// CHange these if you headers are different | |
$zipHeader = 'postal code'; | |
$countryHeader = 'country'; | |
foreach ($lines_to_process as $line) { | |
$addressString = ''; | |
if ($line[$countryHeader] !== '') { | |
$addressString .= 'country='.$line[$countryHeader]; | |
} | |
if ($line[$zipHeader] !== '') { | |
if ($line[$countryHeader] !== '') { | |
$addressString .= '&'; | |
} | |
$addressString .= 'postalcode='.$line[$zipHeader]; | |
} | |
if ($addressString === '') { | |
// The file is missing address details for this row. | |
$line = array_merge($line, array('lat' => '', | |
'lng' => '', | |
'formattedaddress' => '', | |
'state' => '', | |
'county' => '', | |
'city' => '', | |
'village' => '', | |
'hamlet' => '' | |
)); | |
} else { | |
// Use the geocode function with the address string | |
$loc = geocode($addressString); | |
if (is_array($loc)) { | |
// We have results | |
$line = array_merge($line, $loc); | |
} else { | |
// No results so add blank fields | |
$line = array_merge($line, array('lat' => '', | |
'lng' => '', | |
'formattedaddress' => '', | |
'state' => '', | |
'county' => '', | |
'city' => '', | |
'village' => '', | |
'hamlet' => '' | |
)); | |
} | |
} | |
$output[] = $line; | |
//Nominim allows one lookup per second so we need to wait iif we don't want to get blocked | |
sleep(1); | |
} | |
// 'w' writes the file wiping any content, 'a' appends to the existing file, keeping previous content. | |
// If we are appending then we don't want to write the headers again. | |
$method = $start === 1 ? 'w' : 'a'; | |
$header = $start === 1 ? true : false; | |
toCSV($output, fopen($resultfile, $method), $header); | |
return json_encode(array( | |
'success' => true, | |
'results' => $output, | |
'next' => $continue, | |
'start' => $start, | |
'chunk' => $chunk | |
)); | |
} | |
if(isset($_POST['action']) && !empty($_POST['action'])) { | |
$action = $_POST['action']; | |
if(isset($_POST['start']) && !empty($_POST['start'])) { | |
$start = (int)$_POST['start']; | |
} | |
if(isset($_POST['chunk']) && !empty($_POST['chunk'])) { | |
$chunk = (int)$_POST['chunk']; | |
} | |
if($action == 'process') { | |
echo process($start, $chunk, $file, $resultfile); | |
exit(); | |
} | |
} | |
?> | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Bootstrap 3 Template</title> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<!-- Bootstrap core CSS --> | |
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/css/bootstrap.min.css" integrity="sha384-rwoIResjU2yc3z8GV/NPeZWAv56rSmLldC3R/AZzGRnGxQQKnKkoFVhFQhNUwEyJ" crossorigin="anonymous"> | |
<!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries --> | |
<!--[if lt IE 9]> | |
<script src="http://cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.js"></script> | |
<script src="http://cdnjs.cloudflare.com/ajax/libs/respond.js/1.4.2/respond.js"></script> | |
<![endif]--> | |
</head> | |
<body> | |
<div class="container"> | |
<?php if (file_exists('mailing_list.csv')) : ?> | |
<p class="mt-3">CSV file found....<br>Click the button to begin.</p> | |
<button id="start" class="btn btn-large btn-primary">GO!</button> | |
<?php else : ?> | |
<div class="alert alert-error"> | |
<p>Cannot find a mailing_list.csv file!</p> | |
</div> | |
<?php endif; ?> | |
<div id="results"></div> | |
</div> | |
<!-- Bootstrap core JavaScript | |
================================================== --> | |
<!-- Placed at the end of the document so the pages load faster --> | |
<script src="https://code.jquery.com/jquery-3.1.1.min.js" crossorigin="anonymous"></script> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/tether/1.4.0/js/tether.min.js" integrity="sha384-DztdAPBWPRXSA/3eYEEUWrWCy7G5KFbe8fFjk5JAIxUYHKkDx6Qin1DkWx51bBrb" crossorigin="anonymous"></script> | |
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/js/bootstrap.min.js" integrity="sha384-vBWWzlZJ8ea9aCX4pEW3rVHjgjt7zpkNpZk+02D9phzyeVkE+jo0ieGizqPLForn" crossorigin="anonymous"></script> | |
<script> | |
$(document).ready(function($) { | |
var line = 1; | |
var chunksize = 20; | |
function process(start, chunksize) { | |
if (line > 1) { | |
$('#results').append('Done!<br>'); | |
} | |
$('#results').append('processing lines ' + line + ' to ' + (line + chunksize - 1) + '...'); | |
$.ajax({ | |
type: 'post', | |
url: window.location.href, | |
data: { | |
action: 'process', | |
start: line, | |
chunk: chunksize | |
}, | |
success: function(response) { | |
var data = JSON.parse(response); | |
//console.log(response); | |
if (data.success) { | |
if (data.next) { | |
line = line + chunksize; | |
console.log('new start:' + line); | |
process(line, chunksize); | |
} else { | |
$('#results').append('<p>Finished!</p>'); | |
$("#start").html('Done!').removeClass('btn-warning').addClass('btn-success'); | |
} | |
} | |
} | |
}); | |
} | |
$('#start').click(function() { | |
$("#start").html('Working...').removeClass('btn-primary').addClass('btn-warning').attr('disabled', true); | |
process(line, chunksize); | |
}) | |
}); | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment