Created
April 8, 2015 19:51
-
-
Save zemd/12727187f308919f964a to your computer and use it in GitHub Desktop.
Parsing data from geonames.org to mongodb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Opesho\CommonBundle\Command; | |
use MongoClient; | |
use MongoDate; | |
use MongoDB; | |
use SplStack; | |
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; | |
use Symfony\Component\Console\Input\InputArgument; | |
use Symfony\Component\Console\Input\InputInterface; | |
use Symfony\Component\Console\Input\InputOption; | |
use Symfony\Component\Console\Output\OutputInterface; | |
class ParseGeonamesCommand extends ContainerAwareCommand | |
{ | |
/** @var MongoDB $db */ | |
protected $db; | |
/** @var OutputInterface */ | |
protected $output; | |
/** @var \MongoCollection */ | |
protected $countries; | |
/** @var \MongoCollection */ | |
protected $states; | |
/** @var \MongoCollection */ | |
protected $cities; | |
/** @var SplStack */ | |
protected $time; | |
/** @var array */ | |
protected $cacheCountries = array(); | |
/** @var array */ | |
protected $cacheStates = array(); | |
/** @var array */ | |
protected $cacheCities = array(); | |
/** @var array */ | |
protected $skip = array(); | |
/** | |
* @see Command | |
*/ | |
protected function configure() | |
{ | |
$this | |
->setName('opesho:geonames:parse') | |
->setDescription('Populating countries') | |
->setDefinition(array( | |
new InputArgument('folder', InputArgument::REQUIRED, "Folder with geonames files"), | |
new InputOption("skip", null, InputOption::VALUE_REQUIRED | InputOption::VALUE_IS_ARRAY, "You can skip each step of population data"), | |
new InputOption("refresh", null, InputOption::VALUE_NONE, "Refresh alternative names") | |
)) | |
->setHelp(<<<EOT | |
The <info>opesho:geonames:parse</info> command populates data for countries and cities: | |
<info>php app/console opesho:geonames:parse</info> | |
EOT | |
); | |
} | |
protected function isSkipped($key) | |
{ | |
return in_array($key, $this->skip); | |
} | |
protected function execute(InputInterface $input, OutputInterface $output) | |
{ | |
$this->output = $output; | |
$folder = realpath($input->getArgument('folder')); | |
$this->skip = $input->getOption('skip'); | |
$allCountries = $folder . DIRECTORY_SEPARATOR . 'allCountries.txt'; | |
$alternativeNames = $folder . DIRECTORY_SEPARATOR . 'alternateNames.txt'; | |
$countryInfo = $folder . DIRECTORY_SEPARATOR . 'countryInfo.txt'; | |
$m = new MongoClient($this->getContainer()->getParameter('mongodb_server')); | |
$this->db = $m->custom_db; // <---- REPLACE WITH YOUR DATABASE NAME | |
$this->countries = $this->db->countries; | |
$this->states = $this->db->states; | |
$this->cities = $this->db->cities; | |
// COUNTRIES ~193 | |
$this->timeStart(); | |
$this->info("Inserting countries..."); | |
$this->populate($allCountries, "A", "PCLI", array($this, 'proceedCountries')); | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Updating countries' names..."); | |
if ($this->isSkipped('countries_names')) { | |
$this->warn("Updating countries' names is skipped"); | |
} else { | |
$this->populateNames($alternativeNames, array($this, 'saveCountryNames')); | |
} | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Updating countries' info..."); | |
if ($this->isSkipped("countries_info")) { | |
$this->warn("Updating countries' info is skipped"); | |
} else { | |
$this->populateCountryInfo($countryInfo, array($this, 'saveCountryInfo')); | |
} | |
$this->timeEnd(); | |
$this->cacheCountries = null; | |
// STATES - REGIONS ~153 562 | |
$this->timeStart(); | |
$this->info("Inserting states ADM1..."); | |
$this->populate($allCountries, "A", "ADM1", array($this, "proceedStates")); | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Inserting states ADM2..."); | |
$this->populate($allCountries, "A", "ADM2", array($this, "proceedStates")); | |
$this->timeEnd(); | |
// $this->timeStart(); | |
// $this->info("Inserting states ADM3..."); | |
// $this->populate($allCountries, "A", "ADM3", array($this, "proceedStates")); | |
// $this->timeEnd(); | |
// | |
// $this->timeStart(); | |
// $this->info("Inserting states ADM4..."); | |
// $this->populate($allCountries, "A", "ADM4", array($this, "proceedStates")); | |
// $this->timeEnd(); | |
// | |
// $this->timeStart(); | |
// $this->info("Inserting states ADM5..."); | |
// $this->populate($allCountries, "A", "ADM5", array($this, "proceedStates")); | |
// $this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Updating states' names..."); | |
if ($this->isSkipped("states_names")) { | |
$this->warn("Updating states' names is skipped"); | |
} else { | |
$this->populateNames($alternativeNames, array($this, 'saveStatesNames')); | |
} | |
$this->timeEnd(); | |
// CITIES | |
$this->timeStart(); | |
$this->info("Inserting capitals"); | |
$this->populate($allCountries, "P", "PPLC", array($this, 'proceedCities')); | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Inserting first-order administrative division"); | |
$this->populate($allCountries, "P", "PPLA", array($this, 'proceedCities')); | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Inserting second-order administrative division"); | |
$this->populate($allCountries, "P", "PPLA2", array($this, 'proceedCities')); | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Inserting second-order administrative division"); | |
$this->populate($allCountries, "P", "PPLA3", array($this, 'proceedCities')); | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Inserting second-order administrative division"); | |
$this->populate($allCountries, "P", "PPLA4", array($this, 'proceedCities')); | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Inserting second-order administrative division"); | |
$this->populate($allCountries, "P", "PPL", array($this, 'proceedCities'), 100000); | |
$this->timeEnd(); | |
$this->timeStart(); | |
$this->info("Updating cities' names..."); | |
$this->populateNames($alternativeNames, array($this, 'saveCitiesNames')); | |
$this->timeEnd(); | |
// ADDITIONAL | |
if (!$this->isSkipped('countries_names')) { | |
$this->timeStart(); | |
$this->info("Updating countries' prepared names..."); | |
$this->assignCountryPreparedName(); | |
$this->timeEnd(); | |
} | |
if (!$this->isSkipped("states_names")) { | |
$this->timeStart(); | |
$this->info("Updating states' prepared names..."); | |
$this->assignStatePreparedName(); | |
$this->timeEnd(); | |
} | |
if (!$this->isSkipped("cities_names")) { | |
$this->timeStart(); | |
$this->info("Updating cities' prepared names..."); | |
$this->assignCityPreparedName(); | |
$this->timeEnd(); | |
} | |
$this->cacheStates = null; | |
$this->cacheCities = null; | |
echo " "; | |
} | |
protected function proceedCities( | |
$geonameid, | |
$name, | |
$asciiName, | |
$alternateNames, | |
$latitude, | |
$longitude, | |
$featureClass, | |
$featureCode, | |
$countryCode, | |
$cc2, | |
$admin1Code, | |
$admin2Code, | |
$admin3Code, | |
$admin4Code, | |
$population, | |
$elevation, | |
$dem, | |
$timezone, | |
$modificationDate | |
) | |
{ | |
if ($this->isSkipped("cities")) { | |
return; | |
} | |
if ($this->isSkipped("cities_{$featureCode}")) { | |
return; | |
} | |
$geonameid = intval($geonameid); | |
$date = new \DateTime($modificationDate, new \DateTimeZone('UTC')); | |
$this->cacheCities[] = intval($geonameid); | |
$country = $this->countries->findOne(array('country_code' => $countryCode), array()); | |
$state = $this->states->findOne(array('country_code' => $countryCode, 'region_code' => $admin1Code), array()); | |
$this->cities->update( | |
array( | |
'geonameid' => $geonameid | |
), | |
array( | |
'$set' => array( | |
'geonameid' => $geonameid, | |
'name' => $name, | |
'ascii_name' => $asciiName, | |
'latitude' => floatval($latitude), | |
'longitude' => floatval($longitude), | |
'order' => $featureCode == 'PPLC' ? 0 : 500, | |
'modified_at' => new MongoDate($date->getTimestamp()), | |
'country_code' => $countryCode, | |
'country' => \MongoDBRef::create('countries', $country['_id']), | |
'states' => [$admin1Code, $admin2Code, $admin3Code, $admin4Code], | |
'state' => isset($state) ? \MongoDBRef::create('states', $state['_id']) : null, | |
'feature_code' => $featureCode, | |
'population' => intval($population) | |
) | |
), | |
array( | |
'upsert' => true | |
) | |
); | |
} | |
protected function proceedStates( | |
$geonameid, | |
$name, | |
$asciiName, | |
$alternateNames, | |
$latitude, | |
$longitude, | |
$featureClass, | |
$featureCode, | |
$countryCode, | |
$cc2, | |
$admin1Code, | |
$admin2Code, | |
$admin3Code, | |
$admin4Code, | |
$population, | |
$elevation, | |
$dem, | |
$timezone, | |
$modificationDate | |
) | |
{ | |
if ($this->isSkipped("states")) { | |
$this->warn("Skipping states...", 1); | |
return; | |
} | |
if ($this->isSkipped("states_{$featureCode}")) { | |
$this->warn("Skipping states {$featureCode}", 1); | |
return; | |
} | |
$geonameid = intval($geonameid); | |
$this->cacheStates[] = $geonameid; | |
$this->states->update( | |
array( | |
'geonameid' => $geonameid | |
), | |
array( | |
'$set' => array( | |
'geonameid' => $geonameid, | |
'name' => $name, | |
'ascii_name' => $asciiName, | |
//'alternative_names' => $alternateNames, | |
'country_code' => $countryCode, | |
'admin1_code' => $admin1Code, | |
'admin2_code' => $admin2Code, | |
'admin3_code' => $admin3Code, | |
'admin4_code' => $admin4Code, | |
'feature_code' => $featureCode | |
) | |
), | |
array( | |
'upsert' => true | |
) | |
); | |
} | |
private function timeStart() | |
{ | |
if (empty($this->time)) { | |
$this->time = new SplStack(); | |
} | |
$this->time->push(microtime(true)); | |
} | |
private function timeEnd() | |
{ | |
$timeEnd = microtime(true); | |
$executionTime = ($timeEnd - $this->time->pop()) / 60; | |
$this->debug("Operation executed in: <info>{$executionTime}</info> mins."); | |
} | |
private function debug($message) | |
{ | |
$this->output->writeln("<comment>[DEBUG]</comment> {$message}"); | |
} | |
private function info($message) | |
{ | |
$this->output->writeln("<info>[INFO]</info> {$message}"); | |
} | |
private function warn($message, $times = 0) | |
{ | |
static $messages = []; | |
if ($times > 0 && isset($messages[$message]) && $messages[$message] === $times) { | |
return; | |
} | |
if (!isset($messages[$message])) { | |
$messages[$message] = 0; | |
} | |
$messages[$message] += 1; | |
$this->output->writeln("<error>[WARN]</error> {$message}"); | |
} | |
protected function saveCountryInfo( | |
$iso, | |
$iso3, | |
$isoNumeric, | |
$fips, | |
$countryName, | |
$capital, | |
$area, | |
$population, | |
$continent, | |
$tld, | |
$currencyCode, | |
$currencyName, | |
$phone, | |
$postalCodeFormat, | |
$postCodeRegex, | |
$languages, | |
$geonameid, | |
$neighbors, | |
$equivalentFipsCode | |
) { | |
$upd = array( | |
'$set' => array( | |
'ascii_name' => $countryName, | |
'locale' => substr($languages[0], 0, 2), | |
'languages' => $languages | |
) | |
); | |
$this->countries->update(array('geonameid' => intval($geonameid)), $upd); | |
} | |
private function tick($text = "") | |
{ | |
$ch = '\\|/-\\'; | |
static $char = '/'; | |
static $lastString = ""; | |
$char = substr($ch, strpos($ch, $char) + 1, 1); | |
if (empty(trim($lastString)) && empty($text)) { | |
echo "{$char}\r"; | |
return; | |
} | |
$newString = str_pad("{$text} {$char}", strlen($lastString)); | |
$ret = str_repeat("\r", strlen($newString)); | |
echo "{$newString}{$ret}"; | |
$lastString = $newString; | |
} | |
private function memory() | |
{ | |
static $lastMemoryUsage = 0.0; | |
$memory = memory_get_peak_usage() / 1024 / 1024; | |
if ($lastMemoryUsage != $memory && $memory - $lastMemoryUsage > 0.1) { | |
$this->debug("Memory used: <info>{$memory}</info> MB"); | |
$lastMemoryUsage = $memory; | |
} | |
} | |
protected function populateCountryInfo($file, $callback) | |
{ | |
$handle = fopen($file, 'r'); | |
while ($line = fgets($handle)) { | |
if ($line[0] == '#') { | |
continue; | |
} | |
$data = explode("\t", $line); | |
$this->tick(); | |
list( | |
$iso, | |
$iso3, | |
$isoNumeric, | |
$fips, | |
$countryName, | |
$capital, | |
$area, | |
$population, | |
$continent, | |
$tld, | |
$currencyCode, | |
$currencyName, | |
$phone, | |
$postalCodeFormat, | |
$postCodeRegex, | |
$languages, | |
$geonameid, | |
$neighbors, | |
$equivalentFipsCode | |
) = $data; | |
$languages = explode(',', $languages); | |
call_user_func($callback, $iso, | |
$iso3, | |
$isoNumeric, | |
$fips, | |
$countryName, | |
$capital, | |
$area, | |
$population, | |
$continent, | |
$tld, | |
$currencyCode, | |
$currencyName, | |
$phone, | |
$postalCodeFormat, | |
$postCodeRegex, | |
$languages, | |
$geonameid, | |
$neighbors, | |
$equivalentFipsCode); | |
$iso = null; | |
$iso3 = null; | |
$isoNumeric = null; | |
$fips = null; | |
$countryName = null; | |
$capital = null; | |
$area = null; | |
$population = null; | |
$continent = null; | |
$tld = null; | |
$currencyCode = null; | |
$currencyName = null; | |
$phone = null; | |
$postalCodeFormat = null; | |
$postCodeRegex = null; | |
$languages = null; | |
$geonameid = null; | |
$neighbors = null; | |
$equivalentFipsCode = null; | |
$this->memory(); | |
} | |
fclose($handle); | |
} | |
// CAUTION: memory leaks in this method | |
protected function assignCountryPreparedName() | |
{ | |
$countries = $this->countries->find(); | |
foreach ($countries as $country) { | |
$alternatives = $country['alternative_names']; | |
// | |
// locale == country_locale && is_preferred | |
// locale != country_locale && is_preferred | |
// locale == 'en' && !is_preferred | |
// locale == 'en' && is_preferred | |
// | |
// preferred_name = 1. locale + is_preferred 2. locale + !is_preferred 3. 'en' + is_preferred 4. 'en' + !is_preferred | |
// ascii_name = 1. 'en' + is_preferred 2. 'en' + !is_preferred 3. default value | |
// Note: ascii_name is set during populating country info, so there is no need to set it here | |
// | |
$preferred = array(); | |
foreach ($alternatives as $alternative) { | |
$this->tick(); | |
if (in_array($alternative['locale'], array($country['locale'], 'en'))) { | |
$preferred[] = array( | |
'name' => $alternative['name'], | |
'locale' => $alternative['locale'], | |
'is_preferred_name' => $alternative['is_preferred_name'] | |
); | |
if ($alternative["is_preferred_name"] && $alternative['locale'] == $country['locale']) { | |
break; | |
} | |
} | |
} | |
$locale = $country['locale']; | |
$this->sortPreferred($preferred, $locale); | |
$upd = array( | |
'$set' => array( | |
'preferred_name' => $preferred[0]['name'] | |
) | |
); | |
$preferred = null; | |
$this->tick(); | |
$this->countries->update(array('geonameid' => $country['geonameid']), $upd); | |
$this->memory(); | |
} | |
} | |
// CAUTION: memory leaks in this method | |
protected function assignStatePreparedName() | |
{ | |
$states = $this->states->find(); | |
$counter = 0; | |
foreach ($states as $state) { | |
if (empty($state['alternative_names'])) { | |
continue; | |
} | |
$alternatives = $state['alternative_names']; | |
$country = $this->countries->findOne(array('country_code' => $state['country_code']), array('locale')); | |
// | |
// locale == country_locale && is_preferred | |
// locale != country_locale && is_preferred | |
// locale == 'en' && !is_preferred | |
// locale == 'en' && is_preferred | |
// | |
// preferred_name = 1. locale + is_preferred 2. locale + !is_preferred 3. 'en' + is_preferred 4. 'en' + !is_preferred | |
// ascii_name = 1. 'en' + is_preferred 2. 'en' + !is_preferred 3. default value | |
// Note: ascii_name is set during populating country info, so there is no need to set it here | |
// | |
$preferred = array(); | |
foreach ($alternatives as $alternative) { | |
$this->tick(); | |
if (in_array($alternative['locale'], array($country['locale'], 'en'))) { | |
$preferred[] = array( | |
'name' => $alternative['name'], | |
'locale' => $alternative['locale'], | |
'is_preferred_name' => $alternative['is_preferred_name'] | |
); | |
if ($alternative["is_preferred_name"] && $alternative['locale'] == $country['locale']) { | |
break; | |
} | |
} | |
} | |
if (empty($preferred)) { | |
continue; | |
} | |
$locale = $country['locale']; | |
$this->sortPreferred($preferred, $locale); | |
$upd = array( | |
'$set' => array( | |
'preferred_name' => $preferred[0]['name'] | |
) | |
); | |
$preferred = null; | |
$counter += 1; | |
$this->tick("{$counter} states' names updated..."); | |
$this->states->update(array('geonameid' => $state['geonameid']), $upd); | |
$this->memory(); | |
} | |
} | |
// CAUTION: memory leaks in this method | |
protected function assignCityPreparedName() | |
{ | |
$cities = $this->cities->find(array('geonameid' => array( '$in' => $this->cacheCities ))); | |
foreach ($cities as $city) { | |
if (empty($city['alternative_names'])) { | |
continue; | |
} | |
$alternatives = $city['alternative_names']; | |
$country = $this->countries->findOne(array('country_code' => $city['country_code']), array('locale')); | |
// | |
// locale == country_locale && is_preferred | |
// locale != country_locale && is_preferred | |
// locale == 'en' && !is_preferred | |
// locale == 'en' && is_preferred | |
// | |
// preferred_name = 1. locale + is_preferred 2. locale + !is_preferred 3. 'en' + is_preferred 4. 'en' + !is_preferred | |
// ascii_name = 1. 'en' + is_preferred 2. 'en' + !is_preferred 3. default value | |
// Note: ascii_name is set during populating country info, so there is no need to set it here | |
// | |
$preferred = array(); | |
foreach ($alternatives as $alternative) { | |
$this->tick(); | |
if (in_array($alternative['locale'], array($country['locale'], 'en'))) { | |
$preferred[] = array( | |
'name' => $alternative['name'], | |
'locale' => $alternative['locale'], | |
'is_preferred_name' => $alternative['is_preferred_name'] | |
); | |
if ($alternative["is_preferred_name"] && $alternative['locale'] == $country['locale']) { | |
break; | |
} | |
} | |
} | |
if (empty($preferred)) { | |
continue; | |
} | |
$locale = $country['locale']; | |
$this->sortPreferred($preferred, $locale); | |
$upd = array( | |
'$set' => array( | |
'preferred_name' => $preferred[0]['name'] | |
) | |
); | |
$preferred = null; | |
$this->tick(); | |
$this->cities->update(array('geonameid' => $city['geonameid']), $upd); | |
$this->memory(); | |
} | |
} | |
private function sortPreferred(array &$arr, $locale) | |
{ | |
usort($arr, function ($a, $b) use ($locale) { | |
$isEqual = $a['locale'] == $b['locale'] && $a['is_preferred_name'] == $b['is_preferred_name']; | |
if ($isEqual) { | |
if ($a == $b) { | |
return 0; | |
} | |
return $a > $b ? -1 : 1; | |
}; | |
if ($a['locale'] == $locale && $b['locale'] != $locale) { | |
return -1; | |
} | |
if ($b['locale'] == $locale && $a['locale'] != $locale) { | |
return 1; | |
} | |
if ($a['locale'] == $b['locale']) { | |
return $a['is_preferred_name'] ? -1 : 1; | |
} | |
}); | |
} | |
protected function populateNames($source, $callback) | |
{ | |
$handle = fopen($source, 'r'); | |
$counter = 0; | |
while ($line = fgets($handle)) { | |
$data = explode("\t", $line); | |
list( | |
$id, | |
$geonameid, | |
$isocode, | |
$name, | |
$isPreferredName, | |
$isShortName, | |
$isColloquial, | |
$isHistoric | |
) = $data; | |
if ($isHistoric == '1' || $isColloquial == '1' || empty($isocode) || in_array($isocode, | |
array('link', 'abbr', 'fr_1793', 'faac', 'icao', 'iata', 'post')) | |
) { | |
continue; | |
} | |
$populated = call_user_func($callback, | |
$geonameid, | |
$id, | |
$isocode, | |
$name, | |
$isPreferredName, | |
$isShortName); | |
if ($populated === true) { | |
$counter += 1; | |
} | |
$this->tick("Populated {$counter} entities"); | |
$this->memory(); | |
} | |
$this->info("Populated {$counter} entities' names"); | |
fclose($handle); | |
} | |
protected function saveCitiesNames( | |
$geonameid, | |
$id, | |
$isocode, | |
$name, | |
$isPreferredName, | |
$isShortName | |
) | |
{ | |
$geonameid = intval($geonameid); | |
if (!in_array($geonameid, $this->cacheCities)) { | |
return null; | |
} | |
// if (!$this->cities->count(array('geonameid' => intval($geonameid)))) { | |
// return; | |
// } | |
$upd = array( | |
'$addToSet' => array( | |
'alternative_names' => array( | |
'locale' => strtolower($isocode), | |
'name' => $name, | |
'is_preferred_name' => $isPreferredName == '1', | |
'is_short_name' => $isShortName == '1' | |
) | |
) | |
); | |
$this->cities->update(array('geonameid' => intval($geonameid)), $upd); | |
return true; | |
} | |
protected function saveStatesNames( | |
$geonameid, | |
$id, | |
$isocode, | |
$name, | |
$isPreferredName, | |
$isShortName | |
) | |
{ | |
$geonameid = intval($geonameid); | |
if (!in_array($geonameid, $this->cacheStates)) { | |
return null; | |
} | |
// if (!$this->states->count(array('geonameid' => intval($geonameid)))) { | |
// return; | |
// } | |
$upd = array( | |
'$addToSet' => array( | |
'alternative_names' => array( | |
'locale' => strtolower($isocode), | |
'name' => $name, | |
'is_preferred_name' => $isPreferredName == '1', | |
'is_short_name' => $isShortName == '1' | |
) | |
) | |
); | |
$this->states->update(array('geonameid' => intval($geonameid)), $upd); | |
return true; | |
} | |
protected function saveCountryNames( | |
$geonameid, | |
$id, | |
$isocode, | |
$name, | |
$isPreferredName, | |
$isShortName | |
) { | |
$geonameid = intval($geonameid); | |
if (!in_array($geonameid, $this->cacheCountries)) { | |
return null; | |
} | |
// if (!$this->countries->count(array('geonameid' => intval($geonameid)))) { | |
// return; | |
// } | |
$upd = array( | |
'$addToSet' => array( | |
'alternative_names' => array( | |
'locale' => strtolower($isocode), | |
'name' => $name, | |
'is_preferred_name' => $isPreferredName == '1', | |
'is_short_name' => $isShortName == '1' | |
) | |
) | |
); | |
$this->countries->update(array('geonameid' => intval($geonameid)), $upd); | |
return true; | |
} | |
protected function proceedCountries( | |
$geonameid, | |
$name, | |
$asciiName, | |
$alternateNames, | |
$latitude, | |
$longitude, | |
$featureClass, | |
$featureCode, | |
$countryCode, | |
$cc2, | |
$admin1Code, | |
$admin2Code, | |
$admin3Code, | |
$admin4Code, | |
$population, | |
$elevation, | |
$dem, | |
$timezone, | |
$modificationDate | |
) { | |
$geonameid = intval($geonameid); | |
$this->cacheCountries[] = $geonameid; | |
if ($this->isSkipped('countries')) { | |
return; | |
} | |
$date = new \DateTime($modificationDate, new \DateTimeZone('UTC')); | |
$this->countries->update( | |
array( | |
'geonameid' => $geonameid | |
), | |
array( | |
'$set' => array( | |
'geonameid' => $geonameid, | |
'name' => $name, | |
'ascii_name' => $asciiName, | |
//'alternative_names' => $alternateNames, | |
'country_code' => $countryCode, | |
'modified_at' => new MongoDate($date->getTimestamp()), | |
'states_included' => true, | |
'active' => true | |
) | |
), | |
array( | |
'upsert' => true | |
) | |
); | |
} | |
protected function populate($file, $paramFatureClass, $paramFeatureCode, $callback, $populationLimit = false) | |
{ | |
$handle = fopen($file, 'r'); | |
while ($line = fgets($handle)) { | |
$data = explode("\t", $line); | |
if ($data[6] != $paramFatureClass || $data[7] != $paramFeatureCode) { | |
continue; | |
} | |
if ($populationLimit !== false && intval($data[14]) < $populationLimit) { | |
continue; | |
} | |
$this->tick(); | |
list($geonameid, | |
$name, | |
$asciiName, | |
$alternateNames, | |
$latitude, | |
$longitude, | |
$featureClass, | |
$featureCode, | |
$countryCode, | |
$cc2, | |
$admin1Code, | |
$admin2Code, | |
$admin3Code, | |
$admin4Code, | |
$population, | |
$elevation, | |
$dem, | |
$timezone, | |
$modificationDate) = $data; | |
call_user_func($callback, $geonameid, | |
$name, | |
$asciiName, | |
$alternateNames, | |
$latitude, | |
$longitude, | |
$featureClass, | |
$featureCode, | |
$countryCode, | |
$cc2, | |
$admin1Code, | |
$admin2Code, | |
$admin3Code, | |
$admin4Code, | |
$population, | |
$elevation, | |
$dem, | |
$timezone, | |
$modificationDate); | |
$geonameid = null; | |
$name = null; | |
$asciiName = null; | |
$alternateNames = null; | |
$latitude = null; | |
$longitude = null; | |
$featureClass = null; | |
$featureCode = null; | |
$countryCode = null; | |
$cc2 = null; | |
$admin1Code = null; | |
$admin2Code = null; | |
$admin3Code = null; | |
$admin4Code = null; | |
$population = null; | |
$elevation = null; | |
$dem = null; | |
$timezone = null; | |
$modificationDate = null; | |
$data = null; | |
$this->memory(); | |
} | |
fclose($handle); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Dependencies:
http://download.geonames.org/export/dump/allCountries.zip
http://download.geonames.org/export/dump/alternateNames.zip
http://download.geonames.org/export/dump/countryInfo.txt