Last active
February 3, 2017 23:20
-
-
Save twkm/644c1c528061d5ddebedd591e0bf4681 to your computer and use it in GitHub Desktop.
AWK scripts to process GeoLite2 Database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Get Countries | |
gawk ' | |
BEGIN { | |
FS="\"*,\"*" | |
OFS="\",\"" | |
print "\"" "id", "name", "slug" "\"" | |
} | |
NR>1 { | |
slug = tolower($2) | |
slug_pass_one = gsub(/[ ,_.]/, "-", slug) | |
if (!($1 in country_id)) { | |
country_id[$1]++ | |
} | |
if (!($2 in country_name)) { | |
country_name[$1] = $2 | |
} | |
if (!(slug in country_slug)) { | |
country_slug[$1] = slug | |
} | |
# id, name, slug | |
#print "\"" countries[$6], $6, regions[region], region, cities_count, $11, $13 "\"" | |
} | |
END { | |
for (i in country_id) { | |
print i, country_name[i], country_slug[i] "\"" | |
} | |
} | |
' main_export.csv | |
## Get Regions | |
gawk ' | |
BEGIN { | |
FS="\"+,\"+" | |
OFS="\",\"" | |
print "\"" "id", "country_id", "name", "slug" "\"" | |
} | |
NR>1 { | |
slug = tolower($4) | |
slug_pass_one = gsub(/[ ,_.]/, "-", slug) | |
country_id = $1 | |
gsub(/"/, "", country_id) | |
country_ids[$3] = country_id | |
if (!($3 in ids)) { | |
ids[$3]++ | |
} | |
if (!($4 in names)) { | |
names[$3] = $4 | |
} | |
if (!(slug in slugs)) { | |
slugs[$3] = slug | |
} | |
# id, name, slug | |
#print "\"" countries[$6], $6, regions[region], region, cities_count, $11, $13 "\"" | |
} | |
END { | |
for (i in ids) { | |
print "\"" i, country_ids[i], names[i], slugs[i] "\"" | |
} | |
} | |
' main_export.csv | |
## Get Cities | |
gawk ' | |
BEGIN { | |
FS="\"+,\"+" | |
OFS="\",\"" | |
print "\"" "id", "region_id", "name", "slug", "timezone" "\"" | |
} | |
NR>1 { | |
slug = tolower($6) | |
slug_pass_one = gsub(/[ ,_.]/, "-", slug) | |
if (slug in city_slugs) { | |
i = 2 | |
newslug = slug "-" i | |
while (newslug in city_slugs) { | |
i++ | |
newslug = slug "-" i | |
} | |
slug = newslug | |
} | |
city_slugs[slug] = slug | |
# id, name, slug | |
print "\"" $5, $3, $6, slug, $7 "\"" | |
} | |
END { | |
} | |
' main_export.csv | |
## Insert countries | |
gawk '''BEGIN { | |
FS="\"?,\"?" | |
OFS="" | |
print "INSERT INTO `cities` (`id`, `region_id`, `name`, `slug`, `timezone`, `created`, `modified`) VALUES" | |
} | |
NR>1 { | |
line_ending = "," | |
gsub(/"/, "", $1) | |
gsub(/'\''/, "\\'\''", $3) | |
gsub(/'\''/, "", $4) | |
gsub(/"/, "", $5) | |
print "('\''" $1 "'\'', '\''" $2 "'\'', '\''" $3 "'\'', '\''" $4 "'\'', '\''" $5 "'\'', now(), now())" line_ending | |
} | |
END { | |
} | |
''' cities_export2.csv | |
## Insert regions | |
gawk '''BEGIN { | |
FS="\"?,\"?" | |
OFS="" | |
print "INSERT INTO `regions` (`id`, `country_id`, `name`, `slug`, `created`, `modified`) VALUES" | |
} | |
NR>1 { | |
line_ending = "," | |
gsub(/"/, "", $1) | |
gsub(/'\''/, "\\'\''", $3) | |
gsub(/"/, "", $4) | |
gsub(/'\''/, "", $4) | |
print "('\''" $1 "'\'', '\''" $2 "'\'', '\''" $3 "'\'', '\''" $4 "'\'', now(), now())" line_ending | |
} | |
END { | |
} | |
''' regions_export2.csv | |
## Insert cities | |
gawk '''BEGIN { | |
FS="\"?,\"?" | |
OFS="" | |
print "INSERT INTO `cities` (`id`, `region_id`, `name`, `slug`, `timezone`, `created`, `modified`) VALUES" | |
} | |
NR>1 { | |
line_ending = "," | |
gsub(/"/, "", $1) | |
gsub(/'\''/, "\\'\''", $3) | |
gsub(/'\''/, "", $4) | |
gsub(/"/, "", $5) | |
print "('\''" $1 "'\'', '\''" $2 "'\'', '\''" $3 "'\'', '\''" $4 "'\'', '\''" $5 "'\'', now(), now())" line_ending | |
} | |
END { | |
} | |
''' cities_export2.csv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment