Skip to content

Instantly share code, notes, and snippets.

@robwent
Created June 16, 2017 21:10
Show Gist options
  • Save robwent/4b22022db471d961e04f7ebec2091ef6 to your computer and use it in GitHub Desktop.
Save robwent/4b22022db471d961e04f7ebec2091ef6 to your computer and use it in GitHub Desktop.
PHP script to read Country and Zip information from a CSV file, look up the address from the Nominatim API and save the data to a new file including the new information.
<?php
// Turn off all error reporting
// We don't want notices screwing up and responses
error_reporting(0);
$file = 'mailing_list.csv';
$resultfile = 'mailing_list_nominatim_location.csv';
$start = 1;
$chunk = 20;
function toCSV($data, $outstream, $header=true) {
if (count($data)) {
if ($header) {
fputcsv($outstream, array_keys($data[0]));
}
foreach ($data as $row) {
fputcsv($outstream, $row);
}
}
}
function geocode($address){
// google map geocode api url
$url = "http://nominatim.openstreetmap.org/search?{$address}&format=json&&addressdetails=1";
// get the json response
$resp_json = file_get_contents($url);
// decode the json
$resp = json_decode($resp_json, true);
// get the important data
$lat = !empty($resp[0]['lat']) ? $resp[0]['lat'] : '';
$lng = !empty($resp[0]['lon']) ? $resp[0]['lon'] : '';
$formatted_address = !empty($resp[0]['display_name']) ? $resp[0]['display_name'] : '';
$state = !empty($resp[0]['address']['state']) ? $resp[0]['address']['state'] : '';
$county = !empty($resp[0]['address']['county']) ? $resp[0]['address']['county'] : '';
$city = !empty($resp[0]['address']['city']) ? $resp[0]['address']['city'] : '';
$village = !empty($resp[0]['address']['village']) ? $resp[0]['address']['village'] : '';
$hamlet = !empty($resp[0]['address']['hamlet']) ? $resp[0]['address']['hamlet'] : '';
// put the data in the array
$data_arr = array(
'lat' => $lat,
'lng' => $lng,
'formattedaddress' => $formatted_address,
'state' => $state,
'county' => $county,
'city' => $city,
'village' => $village,
'hamlet' => $hamlet
);
return $data_arr;
}
function _combine_array(&$row, $key, $header) {
$row = array_combine($header, $row);
}
function process($start=1, $chunk=20, $file=null, $resultfile=null) {
$csv = array_map('str_getcsv', file($file));
$header = array_shift($csv);
array_walk($csv, '_combine_array', $header);
$total_rows = count($csv);
// If the last number to be processed is larger than the total then this is the last chunk
$continue = (($start + $chunk) > $total_rows) ? false : true;
//Cut out the rows we want to process
$lines_to_process = array_slice($csv, $start, $chunk);
// CHange these if you headers are different
$zipHeader = 'postal code';
$countryHeader = 'country';
foreach ($lines_to_process as $line) {
$addressString = '';
if ($line[$countryHeader] !== '') {
$addressString .= 'country='.$line[$countryHeader];
}
if ($line[$zipHeader] !== '') {
if ($line[$countryHeader] !== '') {
$addressString .= '&';
}
$addressString .= 'postalcode='.$line[$zipHeader];
}
if ($addressString === '') {
// The file is missing address details for this row.
$line = array_merge($line, array('lat' => '',
'lng' => '',
'formattedaddress' => '',
'state' => '',
'county' => '',
'city' => '',
'village' => '',
'hamlet' => ''
));
} else {
// Use the geocode function with the address string
$loc = geocode($addressString);
if (is_array($loc)) {
// We have results
$line = array_merge($line, $loc);
} else {
// No results so add blank fields
$line = array_merge($line, array('lat' => '',
'lng' => '',
'formattedaddress' => '',
'state' => '',
'county' => '',
'city' => '',
'village' => '',
'hamlet' => ''
));
}
}
$output[] = $line;
//Nominim allows one lookup per second so we need to wait iif we don't want to get blocked
sleep(1);
}
// 'w' writes the file wiping any content, 'a' appends to the existing file, keeping previous content.
// If we are appending then we don't want to write the headers again.
$method = $start === 1 ? 'w' : 'a';
$header = $start === 1 ? true : false;
toCSV($output, fopen($resultfile, $method), $header);
return json_encode(array(
'success' => true,
'results' => $output,
'next' => $continue,
'start' => $start,
'chunk' => $chunk
));
}
if(isset($_POST['action']) && !empty($_POST['action'])) {
$action = $_POST['action'];
if(isset($_POST['start']) && !empty($_POST['start'])) {
$start = (int)$_POST['start'];
}
if(isset($_POST['chunk']) && !empty($_POST['chunk'])) {
$chunk = (int)$_POST['chunk'];
}
if($action == 'process') {
echo process($start, $chunk, $file, $resultfile);
exit();
}
}
?>
<!DOCTYPE html>
<html>
<head>
<title>Bootstrap 3 Template</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- Bootstrap core CSS -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/css/bootstrap.min.css" integrity="sha384-rwoIResjU2yc3z8GV/NPeZWAv56rSmLldC3R/AZzGRnGxQQKnKkoFVhFQhNUwEyJ" crossorigin="anonymous">
<!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
<!--[if lt IE 9]>
<script src="http://cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.js"></script>
<script src="http://cdnjs.cloudflare.com/ajax/libs/respond.js/1.4.2/respond.js"></script>
<![endif]-->
</head>
<body>
<div class="container">
<?php if (file_exists('mailing_list.csv')) : ?>
<p class="mt-3">CSV file found....<br>Click the button to begin.</p>
<button id="start" class="btn btn-large btn-primary">GO!</button>
<?php else : ?>
<div class="alert alert-error">
<p>Cannot find a mailing_list.csv file!</p>
</div>
<?php endif; ?>
<div id="results"></div>
</div>
<!-- Bootstrap core JavaScript
================================================== -->
<!-- Placed at the end of the document so the pages load faster -->
<script src="https://code.jquery.com/jquery-3.1.1.min.js" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/tether/1.4.0/js/tether.min.js" integrity="sha384-DztdAPBWPRXSA/3eYEEUWrWCy7G5KFbe8fFjk5JAIxUYHKkDx6Qin1DkWx51bBrb" crossorigin="anonymous"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/js/bootstrap.min.js" integrity="sha384-vBWWzlZJ8ea9aCX4pEW3rVHjgjt7zpkNpZk+02D9phzyeVkE+jo0ieGizqPLForn" crossorigin="anonymous"></script>
<script>
$(document).ready(function($) {
var line = 1;
var chunksize = 20;
function process(start, chunksize) {
if (line > 1) {
$('#results').append('Done!<br>');
}
$('#results').append('processing lines ' + line + ' to ' + (line + chunksize - 1) + '...');
$.ajax({
type: 'post',
url: window.location.href,
data: {
action: 'process',
start: line,
chunk: chunksize
},
success: function(response) {
var data = JSON.parse(response);
//console.log(response);
if (data.success) {
if (data.next) {
line = line + chunksize;
console.log('new start:' + line);
process(line, chunksize);
} else {
$('#results').append('<p>Finished!</p>');
$("#start").html('Done!').removeClass('btn-warning').addClass('btn-success');
}
}
}
});
}
$('#start').click(function() {
$("#start").html('Working...').removeClass('btn-primary').addClass('btn-warning').attr('disabled', true);
process(line, chunksize);
})
});
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment