Last active
July 12, 2022 17:29
-
-
Save len0rd/18f024962f870fb4bc363cba24d41ab9 to your computer and use it in GitHub Desktop.
Pretty county KML data from the US census and split it into separate files for each state
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import groovy.xml.* | |
/** | |
* Removes some additional MetaData I dont want and splits the | |
* US county KML file into seperate files for each state | |
* Get the census data from: | |
* https://www.census.gov/geo/maps-data/data/kml/kml_counties.html | |
* The higher the ratio, the lower the resolution | |
* | |
* This data can be easily imported into a Google MyMap | |
* | |
* @author len0rd | |
* @since 2018-08-02 | |
*/ | |
def inFile = "allUS.kml" | |
def outFolder = 'split/' | |
// need the false, false so that the slurper has no 'namespace awareness' | |
// this prevents printing out weird tags | |
def kml = new XmlSlurper(false, false).parse(inFile) | |
println "parsed!" | |
def baseSplitKml = """<kml | |
xmlns:gx="http://www.google.com/kml/ext/2.2" | |
xmlns:atom="http://www.w3.org/2005/Atom" | |
xmlns="http://www.opengis.net/kml/2.2"> | |
<Document> | |
<name>cb_2017_us_county_500k</name> | |
<visibility>1</visibility> | |
<Schema name="cb_2017_us_county_500k" id="kml_schema_ft_cb_2017_us_county_500k"> | |
<SimpleField type="xsd:string" name="STATEFP"> | |
<displayName>STATEFP</displayName> | |
</SimpleField> | |
<SimpleField type="xsd:string" name="COUNTYFP"> | |
<displayName>COUNTYFP</displayName> | |
</SimpleField> | |
<SimpleField type="xsd:string" name="COUNTYNS"> | |
<displayName>COUNTYNS</displayName> | |
</SimpleField> | |
<SimpleField type="xsd:string" name="AFFGEOID"> | |
<displayName>AFFGEOID</displayName> | |
</SimpleField> | |
<SimpleField type="xsd:string" name="NAME"> | |
<displayName>NAME</displayName> | |
</SimpleField> | |
</Schema> | |
<Folder id="kml_ft_cb_2017_us_county_500k"> | |
</Folder> | |
</Document> | |
</kml>""" | |
def stateFIPToName = ['01':'Alabama','02':'Alaska','04':'Arizona','05':'Arkansas','06':'California','08':'Colorado','09':'Connecticut','10':'Delaware','11':'District of Columbia','12':'Florida','13':'Georgia','15':'Hawaii','16':'Idaho','17':'Illinois','18':'Indiana','19':'Iowa','20':'Kansas','21':'Kentucky','22':'Louisiana','23':'Maine','24':'Maryland','25':'Massachusetts','26':'Michigan','27':'Minnesota','28':'Mississippi','29':'Missouri','30':'Montana','31':'Nebraska','32':'Nevada','33':'New Hampshire','34':'New Jersey','35':'New Mexico','36':'New York','37':'North Carolina','38':'North Dakota','39':'Ohio','40':'Oklahoma','41':'Oregon','42':'Pennsylvania','44':'Rhode Island','45':'South Carolina','46':'South Dakota','47':'Tennessee','48':'Texas','49':'Utah','50':'Vermont','51':'Virginia','53':'Washington','54':'West Virginia','55':'Wisconsin','56':'Wyoming'] | |
outFolderFile = new File(outFolder) | |
if (!outFolderFile.exists()) { | |
println "create the folder for output" | |
outFolderFile.mkdirs(); | |
} | |
//we dont care about style (obviously) | |
kml.Document.Style.replaceNode{} | |
// first remove column names for extended data we dont want | |
kml.Document.Schema.SimpleField.findAll{ //these be the headers we dont want | |
[email protected]().equals('LSAD') || [email protected]().equals('ALAND') || [email protected]().equals('AWATER') || [email protected]().equals('GEOID')}.each { it -> | |
it.replaceNode{} | |
} | |
def iter = 0 | |
def polyCount = 0 | |
def multiCount = 0 | |
def currentSplitKml = null //this assumes that counties are listed in order by stateFIPS -> countyFIPS | |
def currentSplitKmlFile = null | |
kml.Document.Folder.Placemark.each { | |
it.description.replaceNode{} //remove description node | |
it.styleUrl.replaceNode{} //remove styleurl node | |
//remove extended data we dont want: | |
it.ExtendedData.SchemaData.SimpleData.findAll { | |
[email protected]().equals('LSAD') || [email protected]().equals('ALAND') || [email protected]().equals('AWATER') || [email protected]().equals('GEOID')}.each { dead -> | |
dead.replaceNode{} | |
} | |
if (it.Polygon.size() == 1) { | |
it.Polygon.extrude.replaceNode{} | |
it.Polygon.tessellate.replaceNode{} | |
it.Polygon.altitudeMode.replaceNode{} | |
polyCount++ | |
} else if (it.MultiGeometry.size() == 1) { | |
it.MultiGeometry.Polygon.each { | |
it.extrude.replaceNode{} | |
it.tessellate.replaceNode{} | |
it.altitudeMode.replaceNode{} | |
} | |
multiCount++ | |
} | |
iter++ | |
//see what state this county is in: | |
it.ExtendedData.SchemaData.SimpleData.findAll{[email protected]().equals('STATEFP')}.each{st -> | |
def stateFip = st.text() | |
def stateFipFile = new File( outFolder + "stCounties-" + stateFIPToName[stateFip] + ".kml") | |
if (stateFipFile.exists()) { | |
// if the file already exists, that means we're creating the kml for it now | |
//append this node | |
currentSplitKml.Document.Folder.appendNode(it) | |
} else { | |
println "there is no file for state " + st.text() + " creating one" | |
if (currentSplitKml != null) { | |
println "writing current kml split out to file:: " + currentSplitKmlFile | |
currentSplitKmlFile.withWriter('utf-8') { | |
writer -> writer.writeLine XmlUtil.serialize(currentSplitKml) | |
} | |
} | |
//create the base file: | |
stateFipFile.withWriter('utf-8') { | |
writer -> writer.writeLine '<?xml version="1.0" encoding="UTF-8"?>' | |
} | |
currentSplitKml = new XmlSlurper(false, false).parseText(baseSplitKml) | |
//add the current county to our new splitkml | |
currentSplitKml.Document.Folder.appendNode(it) | |
currentSplitKmlFile = stateFipFile | |
} | |
} | |
} | |
println "=================================================" | |
println "there are " + iter + " counties in this file" | |
println polyCount + " polys, " + multiCount + " multi-polys" | |
println "=================================================" | |
println "saving last split file" | |
if (currentSplitKml != null) { | |
currentSplitKmlFile.withWriter('utf-8') { | |
writer -> writer.writeLine XmlUtil.serialize(currentSplitKml)//serialized | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment