Skip to content

Instantly share code, notes, and snippets.

@len0rd
Last active July 12, 2022 17:29
Show Gist options
  • Save len0rd/18f024962f870fb4bc363cba24d41ab9 to your computer and use it in GitHub Desktop.
Save len0rd/18f024962f870fb4bc363cba24d41ab9 to your computer and use it in GitHub Desktop.
Pretty county KML data from the US census and split it into separate files for each state
import groovy.xml.*
/**
* Removes some additional MetaData I dont want and splits the
* US county KML file into seperate files for each state
* Get the census data from:
* https://www.census.gov/geo/maps-data/data/kml/kml_counties.html
* The higher the ratio, the lower the resolution
*
* This data can be easily imported into a Google MyMap
*
* @author len0rd
* @since 2018-08-02
*/
def inFile = "allUS.kml"
def outFolder = 'split/'
// need the false, false so that the slurper has no 'namespace awareness'
// this prevents printing out weird tags
def kml = new XmlSlurper(false, false).parse(inFile)
println "parsed!"
def baseSplitKml = """<kml
xmlns:gx="http://www.google.com/kml/ext/2.2"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns="http://www.opengis.net/kml/2.2">
<Document>
<name>cb_2017_us_county_500k</name>
<visibility>1</visibility>
<Schema name="cb_2017_us_county_500k" id="kml_schema_ft_cb_2017_us_county_500k">
<SimpleField type="xsd:string" name="STATEFP">
<displayName>STATEFP</displayName>
</SimpleField>
<SimpleField type="xsd:string" name="COUNTYFP">
<displayName>COUNTYFP</displayName>
</SimpleField>
<SimpleField type="xsd:string" name="COUNTYNS">
<displayName>COUNTYNS</displayName>
</SimpleField>
<SimpleField type="xsd:string" name="AFFGEOID">
<displayName>AFFGEOID</displayName>
</SimpleField>
<SimpleField type="xsd:string" name="NAME">
<displayName>NAME</displayName>
</SimpleField>
</Schema>
<Folder id="kml_ft_cb_2017_us_county_500k">
</Folder>
</Document>
</kml>"""
def stateFIPToName = ['01':'Alabama','02':'Alaska','04':'Arizona','05':'Arkansas','06':'California','08':'Colorado','09':'Connecticut','10':'Delaware','11':'District of Columbia','12':'Florida','13':'Georgia','15':'Hawaii','16':'Idaho','17':'Illinois','18':'Indiana','19':'Iowa','20':'Kansas','21':'Kentucky','22':'Louisiana','23':'Maine','24':'Maryland','25':'Massachusetts','26':'Michigan','27':'Minnesota','28':'Mississippi','29':'Missouri','30':'Montana','31':'Nebraska','32':'Nevada','33':'New Hampshire','34':'New Jersey','35':'New Mexico','36':'New York','37':'North Carolina','38':'North Dakota','39':'Ohio','40':'Oklahoma','41':'Oregon','42':'Pennsylvania','44':'Rhode Island','45':'South Carolina','46':'South Dakota','47':'Tennessee','48':'Texas','49':'Utah','50':'Vermont','51':'Virginia','53':'Washington','54':'West Virginia','55':'Wisconsin','56':'Wyoming']
outFolderFile = new File(outFolder)
if (!outFolderFile.exists()) {
println "create the folder for output"
outFolderFile.mkdirs();
}
//we dont care about style (obviously)
kml.Document.Style.replaceNode{}
// first remove column names for extended data we dont want
kml.Document.Schema.SimpleField.findAll{ //these be the headers we dont want
[email protected]().equals('LSAD') || [email protected]().equals('ALAND') || [email protected]().equals('AWATER') || [email protected]().equals('GEOID')}.each { it ->
it.replaceNode{}
}
def iter = 0
def polyCount = 0
def multiCount = 0
def currentSplitKml = null //this assumes that counties are listed in order by stateFIPS -> countyFIPS
def currentSplitKmlFile = null
kml.Document.Folder.Placemark.each {
it.description.replaceNode{} //remove description node
it.styleUrl.replaceNode{} //remove styleurl node
//remove extended data we dont want:
it.ExtendedData.SchemaData.SimpleData.findAll {
[email protected]().equals('LSAD') || [email protected]().equals('ALAND') || [email protected]().equals('AWATER') || [email protected]().equals('GEOID')}.each { dead ->
dead.replaceNode{}
}
if (it.Polygon.size() == 1) {
it.Polygon.extrude.replaceNode{}
it.Polygon.tessellate.replaceNode{}
it.Polygon.altitudeMode.replaceNode{}
polyCount++
} else if (it.MultiGeometry.size() == 1) {
it.MultiGeometry.Polygon.each {
it.extrude.replaceNode{}
it.tessellate.replaceNode{}
it.altitudeMode.replaceNode{}
}
multiCount++
}
iter++
//see what state this county is in:
it.ExtendedData.SchemaData.SimpleData.findAll{[email protected]().equals('STATEFP')}.each{st ->
def stateFip = st.text()
def stateFipFile = new File( outFolder + "stCounties-" + stateFIPToName[stateFip] + ".kml")
if (stateFipFile.exists()) {
// if the file already exists, that means we're creating the kml for it now
//append this node
currentSplitKml.Document.Folder.appendNode(it)
} else {
println "there is no file for state " + st.text() + " creating one"
if (currentSplitKml != null) {
println "writing current kml split out to file:: " + currentSplitKmlFile
currentSplitKmlFile.withWriter('utf-8') {
writer -> writer.writeLine XmlUtil.serialize(currentSplitKml)
}
}
//create the base file:
stateFipFile.withWriter('utf-8') {
writer -> writer.writeLine '<?xml version="1.0" encoding="UTF-8"?>'
}
currentSplitKml = new XmlSlurper(false, false).parseText(baseSplitKml)
//add the current county to our new splitkml
currentSplitKml.Document.Folder.appendNode(it)
currentSplitKmlFile = stateFipFile
}
}
}
println "================================================="
println "there are " + iter + " counties in this file"
println polyCount + " polys, " + multiCount + " multi-polys"
println "================================================="
println "saving last split file"
if (currentSplitKml != null) {
currentSplitKmlFile.withWriter('utf-8') {
writer -> writer.writeLine XmlUtil.serialize(currentSplitKml)//serialized
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment