Last active
May 4, 2021 14:12
-
-
Save KhepryQuixote/3704e1a727cf9913a41c to your computer and use it in GitHub Desktop.
Python scripts to download, extract, and output oil, gas, and/or fracking well information from various states in the United States of America.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
This Python script is oriented towards oil, gas, and fracking well | |
information download, extract, and output at the state-level within | |
the United States of America. Not all states are present as not all | |
states either allow fracking or have any sites available for download. | |
This Python script is dependent upon another Python script entitled | |
Src2Tgt.py, which is also available as a Gist under the same name. | |
''' | |
import ftplib | |
import os | |
import socket | |
import urllib | |
import requests | |
from Src2Tgt import * | |
from urllib.request import urlretrieve | |
# global variables | |
downloadFiles = False | |
unpackFiles = True | |
maxRecords = 0 | |
flushCount = 1000 | |
# ============================================================================= | |
# Main routine: | |
# ============================================================================= | |
def main(): | |
states = ['OH','PA'] # these states are still thorns in my side at the moment | |
states = ['NY'] # this or these states are the ones I wish to download | |
# NOTE: 'OH' fails because of the FTP site | |
# is either not active or is not allowing connections | |
# NOTE: 'PA' fails because of a 302 redirect | |
# Look under your Chrome "Python" bookmarks | |
# for sample code as to how to handle redirects | |
# This was somewhat solved using "requests" instead of "urllib", | |
# however, now the website wants the program to "agree" by pressing | |
# a button on the redirect screen | |
# Can I programmatically "submit" an "agree"? | |
# <input type="submit" name="ctl00$MainContent$AgreeButton" value="Agree" id="MainContent_AgreeButton" /> | |
# I've done it before via C# and Java, but what the hell, why not in Python, eh? | |
mainPath = "/home/temp/fracking/data" | |
downloadPath = os.path.join(mainPath, "download") | |
unpackedPath = os.path.join(mainPath, "unpacked") | |
listStates = {'AK':'Alaska', | |
'AL':'Alabama', | |
'AR':'Arkansas', | |
'CA':'California', | |
'CO':'Colorado', | |
'FL':'Florida', | |
'ID':'Idaho', | |
'KS':'Kansas', | |
'LA':'Louisiana', | |
'MI':'Michigan', | |
'MO':'Missouri', | |
'MT':'Montana', | |
'ND':'North Dakota', | |
'NE':'Nebraska', | |
'NY':'New York', | |
'OH':'Ohio', | |
'OK':'Oklahoma', | |
'OR':'Oregon', | |
'PA':'Pennsylvania', | |
'UT':'Utah', | |
'VA':'Virginia', | |
'WA':'Washington', | |
'WV':'West Virginia'} | |
for state in states: | |
if state in listStates: | |
print ('--------------------') | |
print ('%s (%s) process' % (listStates[state], state)) | |
process_files(downloadPath, unpackedPath, state, downloadFiles, unpackFiles, maxRecords, flushCount) | |
# ============================================================================= | |
# Well information downloads and processing, state-by-state: | |
# ============================================================================= | |
def process_files(dwnPath, unpPath, state, downloadFiles, unpackFiles, maxRecords, flushCount): | |
unzPath = "" | |
dwnPath = os.path.join(dwnPath, state) | |
unpPath = os.path.join(unpPath, state) | |
if not os.path.exists(dwnPath): | |
os.makedirs(dwnPath) | |
if not os.path.exists(unpPath): | |
os.makedirs(unpPath) | |
src2tgt = Src2Tgt() | |
# ------------------------------------------------------------------------- | |
# Alaska | |
# ------------------------------------------------------------------------- | |
if state == 'AK': | |
tgtFile = "AOGCC_DataExtract.zip" | |
if downloadFiles: | |
httpUrl = "http://aogweb.state.ak.us/Data_Extract/AOGCC_DataExtract.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'tblWellMaster', maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Alabama | |
# ------------------------------------------------------------------------- | |
if state == 'AL': | |
tgtFile = "wells.zip" | |
if downloadFiles: | |
httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/wells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "bhl_loc.zip" | |
if downloadFiles: | |
httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/bhl_loc.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "bhl_lines.zip" | |
if downloadFiles: | |
httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/bhl_lines.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Arkansas | |
# ------------------------------------------------------------------------- | |
if state == 'AR': | |
tgtFile = "Natural_Gas_and_Oil_Wells.zip" | |
if downloadFiles: | |
httpUrl = "http://aogc2.state.ar.us/GIS_GOOGLE/Natural_Gas_and_Oil_Wells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
pass # TODO: need to process these as kml2csv | |
tgtFile = "Drilling_Fluid_Disposal_Sites.zip" | |
if downloadFiles: | |
httpUrl = "http://aogc2.state.ar.us/GIS_GOOGLE/Drilling_Fluid_Disposal_Sites.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
pass # TODO: need to process these as kml2csv | |
# ------------------------------------------------------------------------- | |
# California | |
# ------------------------------------------------------------------------- | |
if state == 'CA': | |
tgtFile = "AllWells_Excel.zip" | |
if downloadFiles: | |
httpUrl = "ftp://ftp.consrv.ca.gov/pub/oil/GIS/ExcelTables/AllWells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = "AllWells_Excel.csv" | |
src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "AllWells_Shape.zip" | |
if downloadFiles: | |
httpUrl = "ftp://ftp.consrv.ca.gov/pub/oil/GIS/Shapefiles/AllWells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = "AllWells_Shape.csv" | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Colorado | |
# ------------------------------------------------------------------------- | |
if state == 'CO': | |
tgtFile = "WELL_SHP.ZIP" | |
if downloadFiles: | |
httpUrl = "http://cogcc.state.co.us/Downloads/WELL_SHP.ZIP" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "DIR_BHLS.ZIP" | |
if downloadFiles: | |
httpUrl = "http://cogcc.state.co.us/Downloads/DIR_BHLS.ZIP" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "DIRLINES.ZIP" | |
if downloadFiles: | |
httpUrl = "http://cogcc.state.co.us/Downloads/DIRLINES.ZIP" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Florida | |
# ------------------------------------------------------------------------- | |
if state == 'FL': | |
tgtFile = "og-permit-database.xlsx" | |
if downloadFiles: | |
httpUrl = "http://www.dep.state.fl.us/water/mines/oil_gas/docs/og-permit-database.xlsx" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
xlsFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.xlsx','.csv') | |
src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount) | |
tgtFile = "pre_permit_oil_gas_wells.xls" | |
if downloadFiles: | |
httpUrl = "http://www.dep.state.fl.us/water/mines/oil_gas/docs/pre_permit_oil_gas_wells.xls" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
xlsFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.xls','.csv') | |
src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount) | |
# ------------------------------------------------------------------------- | |
# Idaho | |
# ------------------------------------------------------------------------- | |
if state == 'ID': | |
# TODO: will need to adjust the lat/longs in that they're in | |
# the Idaho Transverse Mercator (IDTM) projection. | |
tgtFile = "AllPermittedWells.zip" | |
if downloadFiles: | |
httpUrl = "http://www.idwr.idaho.gov/ftp/gisdata/Spatial/Wells/WellConstruction/wells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "InjectionWells.zip" | |
if downloadFiles: | |
httpUrl = "http://www.idwr.idaho.gov/ftp/gisdata/Spatial/Wells/UndergroundInjectionControl/InjectionWells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Kansas | |
# ------------------------------------------------------------------------- | |
if state == 'KS': | |
tgtFile = "ks_wells.zip" | |
if downloadFiles: | |
httpUrl = "http://www.kgs.ku.edu/PRS/Ora_Archive/ks_wells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = "" # empty string forces CSV file to same name as each unzipped file | |
src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "OILGAS_WELLS_GEO27.zip" | |
if downloadFiles: | |
httpUrl = "http://www.kgs.ku.edu/PRS/Ora_Archive/OILGAS_WELLS_GEO27.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Louisiana | |
# ------------------------------------------------------------------------- | |
if state == 'LA': | |
tgtFile = "wells.zip" | |
if downloadFiles: | |
httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/wells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "BOTTOM_HOLE.zip" | |
if downloadFiles: | |
httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/BOTTOM_HOLE.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "BOTTOM_HOLE_LINE.zip" | |
if downloadFiles: | |
httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/BOTTOM_HOLE_LINE.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "WATER_WELL_REG.zip" | |
if downloadFiles: | |
httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/WATER_WELL_REG.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Michigan | |
# ------------------------------------------------------------------------- | |
if state == 'MI': | |
tgtFile = "ogs-oilandgas-well-locations.zip" | |
if downloadFiles: | |
ftpUrl = "ftp.deq.state.mi.us" | |
ftpPath = "geowebface/DownLoads" | |
ftpFile = "ogs-oilandgas-well-locations.zip" | |
userName = "GeoWebFace" | |
passWord = "Geology(1)" | |
ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'well locations', maxRecords, flushCount, unzPath) | |
tgtFile = "ogs-oilandgas-injection-data_278433_7.zip" | |
if downloadFiles: | |
httpUrl = "http://www.michigan.gov/documents/deq/ogs-oilandgas-injection-data_278433_7.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'Injection Wells Data', maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Missouri | |
# ------------------------------------------------------------------------- | |
if state == 'MO': | |
tgtFile = "permits-6-5-2014.xls" | |
if downloadFiles: | |
httpUrl = "http://www.dnr.mo.gov/geology/geosrv/docs/permits6-5-2014.xls" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
xlsFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.xls','.csv') | |
src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount) | |
# ------------------------------------------------------------------------- | |
# Montana | |
# ------------------------------------------------------------------------- | |
if state == 'MT': | |
# TODO: will have to figure out how to obtain these automatically! | |
tgtFile = "WellSurfaceLongLat.aspx" | |
if downloadFiles: | |
httpUrl = "http://www.bogc.dnrc.mt.gov/WebApps/DataMiner/Wells/WellSurfaceLongLat.aspx" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
pass | |
tgtFile = "WellBLSLongLat.aspx" | |
if downloadFiles: | |
httpUrl = "http://www.bogc.dnrc.mt.gov/WebApps/DataMiner/Wells/WellBLSLongLat.aspx" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
pass | |
# ------------------------------------------------------------------------- | |
# North Dakota | |
# ------------------------------------------------------------------------- | |
if state == 'ND': | |
tgtFile = "Wells.zip" | |
if downloadFiles: | |
httpUrl = "https://www.dmr.nd.gov/output/ShapeFiles/Wells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Nebraska | |
# ------------------------------------------------------------------------- | |
if state == 'NE': | |
tgtFile = "NebraskaWellData.zip" | |
if downloadFiles: | |
httpUrl = "http://www.nogcc.ne.gov/Publications/NebraskaWellData.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'tblNebraskaWellData', maxRecords, flushCount, unzPath) | |
tgtFile = "NE_WELLS.zip" | |
if downloadFiles: | |
httpUrl = "http://www.nogcc.ne.gov/Publications/NE_WELLS.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.zip','.csv') | |
# TODO: might be nice to tweak zip2shp2csv to compress multiple spaces down to one space | |
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "NebraskaWellStatus_TypeCodes.pdf" | |
if downloadFiles: | |
httpUrl = "http://www.nogcc.ne.gov/Publications/NebraskaWellStatus_TypeCodes.pdf" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
shutil.copy2(os.path.join(dwnPath, tgtFile), os.path.join(unpPath, tgtFile)) | |
# ------------------------------------------------------------------------- | |
# New York | |
# ------------------------------------------------------------------------- | |
if state == 'NY': | |
tgtFile = "wellDOS.zip" | |
if downloadFiles: | |
httpUrl = "ftp://ftp.dec.state.ny.us/dmn/zip/wellDOS.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = "" | |
src2tgt.zip2csv2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ------------------------------------------------------------------------- | |
# Ohio | |
# ------------------------------------------------------------------------- | |
if state == 'OH': | |
# TODO: This FTP site appears to be no longer available | |
ftpUrl = "ftp.dnr.state.oh.us" | |
userName = "" | |
passWord = "" | |
ftpPath = "OilGas/Upload/RBDMS" | |
tgtFile = "Setup.exe" | |
if downloadFiles: | |
ftpFile = "Setup.exe" | |
ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile) | |
tgtFile = "Rdbmsd97.exe" | |
if downloadFiles: | |
ftpFile = "Rdbmsd97.exe" | |
ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile) | |
if unpackFiles: | |
pass | |
# ------------------------------------------------------------------------- | |
# Oklahoma | |
# ------------------------------------------------------------------------- | |
if state == 'OK': | |
tgtFile = "W27BASE.ZIP" | |
if downloadFiles: | |
httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/W27BASE.ZIP" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = "" # empty string forces CSV file to same name as each unzipped file | |
src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath, '|', ',') | |
tgtFile = "W97OPER.ZIP" | |
if downloadFiles: | |
httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/W97OPER.ZIP" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = "" # empty string forces CSV file to same name as each unzipped file | |
src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath, '|', ',') | |
tgtFile = "CONTENTS.XLS" | |
if downloadFiles: | |
httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/CONTENTS.XLS" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
shutil.copy2(os.path.join(dwnPath, tgtFile), os.path.join(unpPath, tgtFile)) | |
# ------------------------------------------------------------------------- | |
# Oregon | |
# ------------------------------------------------------------------------- | |
if state == 'OR': | |
tgtFile = "OG_Permits_06-25-2013.xlsx" | |
if downloadFiles: | |
httpUrl = "http://www.oregongeology.org/mlrr/spreadsheets/OG_Permits_06-25-2013.xlsx" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
xlsFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.xlsx','.csv') | |
src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount) | |
# ------------------------------------------------------------------------- | |
# Pennsylvania | |
# ------------------------------------------------------------------------- | |
# TODO: This may have to be done manually until | |
# I can figure out how to "submit" | |
# an "agree" button to a redirected form | |
if state == 'PA': | |
tgtFile = "Conventional_Wells_2013_01-12.csv" | |
if downloadFiles: | |
httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-0" | |
requests_download(httpUrl, dwnPath, tgtFile) | |
tgtFile = "Unconventional_Wells_2013_01-06.csv" | |
if downloadFiles: | |
httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-1" | |
requests_download(httpUrl, dwnPath, tgtFile) | |
tgtFile = "Unconventional_Wells_2013_07-12.csv" | |
if downloadFiles: | |
httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-2" | |
requests_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
pass | |
# ------------------------------------------------------------------------- | |
# Utah | |
# ------------------------------------------------------------------------- | |
if state == 'UT': | |
tgtFile = "welldata.exe" | |
if downloadFiles: | |
httpUrl = "https://oilgas.ogm.utah.gov/pub/Database/welldata.exe" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
# TODO: will have to come up with an exe2dbf2csv routine | |
# in order to process this state's data file(s) | |
pass | |
# ------------------------------------------------------------------------- | |
# Virginia | |
# ------------------------------------------------------------------------- | |
if state == 'VA': | |
tgtFile = "DgoWellLocation.pdf" | |
if downloadFiles: | |
httpUrl = "http://www.dmme.virginia.gov/dgoinquiry/frmPrint.aspx?Form=DgoWellLocation" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
# TODO: will need to write a pdf2csv routine | |
pdfFile = os.path.join(dwnPath, tgtFile) | |
csvFile = tgtFile.lower().replace('.pdf','.csv') | |
src2tgt.pdf2csv(pdfFile, unpPath, csvFile, maxRecords, flushCount) | |
pass | |
# ------------------------------------------------------------------------- | |
# Washington | |
# ------------------------------------------------------------------------- | |
if state == 'WA': | |
# TODO: this one appears to be a total ass | |
# to work with, maybe ogr2ogr can handle it | |
# TODO: come up with a gdb2csv routine | |
tgtFile = "ger_portal_oil_gas_wells.zip" | |
if downloadFiles: | |
httpUrl = "http://www.dnr.wa.gov/Publications/ger_portal_oil_gas_wells.zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
pass | |
# ------------------------------------------------------------------------- | |
# West Virginia | |
# ------------------------------------------------------------------------- | |
if state == 'WV': | |
# TODO: first column is missing column name | |
# visual inspection reveals that the values | |
# are simply the row number and can be safely | |
# overlooked for the time being | |
tgtFile = "WellLocationPart1(10-31-2013).zip" | |
if downloadFiles: | |
httpUrl = "http://www.dep.wv.gov/oil-and-gas/databaseinfo/Documents/WellLocationPart1(10-31-2013).zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = "" | |
src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
tgtFile = "WellLocationPart2(10-31-2013).zip" | |
if downloadFiles: | |
httpUrl = "http://www.dep.wv.gov/oil-and-gas/databaseinfo/Documents/WellLocationPart2(10-31-2013).zip" | |
urllib_download(httpUrl, dwnPath, tgtFile) | |
if unpackFiles: | |
zipFile = os.path.join(dwnPath, tgtFile) | |
csvFile = "" | |
src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath) | |
# ============================================================================= | |
# FTP File Processing: | |
# Pull a specified file (i.e. download the file) | |
# ============================================================================= | |
def ftp_download(url, | |
userName, | |
passWord, | |
ftpPath, | |
ftpFile, | |
tgtPath, | |
tgtFile): | |
print ("==========================") | |
print ("Pulling File from FTP Site") | |
print ("File: %s" % '/'.join([url, ftpPath, ftpFile])) | |
bypass = False | |
ftp = ftplib.FTP() | |
try: | |
ftp.connect(url) | |
except socket.error as e: | |
bypass = True | |
print ('ERROR: unable to connect to "%s"' % url) | |
print ('ERROR: FTP connection error "%s"' % e) | |
if not bypass: | |
if userName != "": | |
try: | |
ftp.login(userName, passWord) | |
except ftplib.error_perm as e: | |
bypass = True | |
print ('ERROR: cannot login (bad user name or password) to "%s"' % url) | |
print ('ERROR: FTP login error "%s"' % e) | |
else: | |
try: | |
# login anonymously | |
ftp.login() | |
except ftplib.error_perm as e: | |
bypass = True | |
print ('ERROR: cannot login (anonymous login forbidden) to "%s"' % url) | |
print ('ERROR: FTP login error "%s"' % e) | |
if not bypass: | |
try: | |
ftp.cwd(ftpPath) | |
except ftplib.error_perm as e: | |
bypass = True | |
print ('ERROR: cannot CWD to "%s"' % ftpPath) | |
print ('ERROR: FTP CWD error "%s"' % e) | |
if not bypass: | |
try: | |
# build the download target file path | |
tgtFilePath = os.path.join(tgtPath, tgtFile) | |
print ("Download file to: %s" % tgtFilePath) | |
ftp.retrbinary("RETR %s" % ftpFile, open(tgtFilePath, 'wb').write) | |
except ftplib.error_perm as e: | |
bypass = True | |
print ('ERROR: cannot read file "%s"' % ftpFile) | |
print ('ERROR: FTP RETR (binary) error "%s"' % e) | |
else: | |
# success! | |
print ('SUCCESS: downloaded "%s" to "%s"' % (ftpFile, tgtFilePath)) | |
ftp.quit() | |
return not bypass | |
# ============================================================================= | |
# HTTP Download routine: | |
# ============================================================================= | |
def urllib_download(url, tgtPath, tgtFile): | |
tgtFullPath = os.path.abspath(os.path.join(tgtPath, tgtFile)) | |
if os.path.exists(tgtPath): | |
print ("urllib_download, download start '%s'" % url) | |
try: | |
urlretrieve(url, tgtFullPath) | |
except urllib.error.HTTPError: | |
pass | |
print ("urllib_download, download ended '%s'" % url) | |
else: | |
print ("urllib_download, tgtPath does not exist '%s'" % tgtPath) | |
return | |
# ============================================================================= | |
# HTTP Download routine: | |
# ============================================================================= | |
def requests_download(url, tgtPath, tgtFile): | |
tgtFullPath = os.path.abspath(os.path.join(tgtPath, tgtFile)) | |
if os.path.exists(tgtPath): | |
print ("urllib_download, download start '%s'" % url) | |
r = requests.get(url, stream=True) | |
with open(tgtFullPath, 'wb') as f: | |
for chunk in r.iter_content(chunk_size=8192): | |
if chunk: | |
f.write(chunk) | |
f.flush() | |
print ("urllib_download, download ended '%s'" % url) | |
else: | |
print ("urllib_download, tgtPath does not exist '%s'" % tgtPath) | |
return | |
# ============================================================================= | |
# Test routine: | |
# ============================================================================= | |
def test_downloads(): | |
tgtPath = '/home/temp' | |
ftpUrl = 'alt.ncsbe.gov' | |
ftpUsr = 'anonymous' | |
ftpPwd = '' | |
ftpPath = 'data' | |
ftpFile = 'ncvoter89.zip' | |
tgtFile = 'ncvoter89.zip' | |
if not os.path.exists(tgtPath): | |
mkdir_recursively(tgtPath) | |
ftp_download(ftpUrl, ftpUsr, ftpPwd, ftpPath, ftpFile, tgtPath, tgtFile) | |
httpUrl = 'ftp://alt.ncsbe.gov/data/ncvoter89.zip' | |
urllib_download(httpUrl, tgtPath, tgtFile) | |
# ============================================================================ | |
# execute the mainline processing routine | |
# ============================================================================ | |
if (__name__ == "__main__"): | |
retval = main() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
This Python script is oriented towards oil, gas, and fracking well | |
information download, extract, and output at the state-level within | |
the United States of America. Not all states are present as not all | |
states either allow fracking or have any sites available for download. | |
This Python script is depended upon by another Python script entitled | |
PyFrackETL.py, which is also available as a Gist under the same name. | |
''' | |
import collections | |
import csv | |
import ogr | |
import os | |
import pyodbc | |
import shutil | |
import tempfile | |
import time | |
import xlrd | |
import zipfile | |
# from dbfpy import dbf | |
class Src2Tgt: | |
cp1252 = { | |
# from http://www.microsoft.com/typography/unicode/1252.htm | |
u"\u20AC": u"\x80", # EURO SIGN | |
u"\u201A": u"\x82", # SINGLE LOW-9 QUOTATION MARK | |
u"\u0192": u"\x83", # LATIN SMALL LETTER F WITH HOOK | |
u"\u201E": u"\x84", # DOUBLE LOW-9 QUOTATION MARK | |
u"\u2026": u"\x85", # HORIZONTAL ELLIPSIS | |
u"\u2020": u"\x86", # DAGGER | |
u"\u2021": u"\x87", # DOUBLE DAGGER | |
u"\u02C6": u"\x88", # MODIFIER LETTER CIRCUMFLEX ACCENT | |
u"\u2030": u"\x89", # PER MILLE SIGN | |
u"\u0160": u"\x8A", # LATIN CAPITAL LETTER S WITH CARON | |
u"\u2039": u"\x8B", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK | |
u"\u0152": u"\x8C", # LATIN CAPITAL LIGATURE OE | |
u"\u017D": u"\x8E", # LATIN CAPITAL LETTER Z WITH CARON | |
u"\u2018": u"\x91", # LEFT SINGLE QUOTATION MARK | |
u"\u2019": u"\x92", # RIGHT SINGLE QUOTATION MARK | |
u"\u201C": u"\x93", # LEFT DOUBLE QUOTATION MARK | |
u"\u201D": u"\x94", # RIGHT DOUBLE QUOTATION MARK | |
u"\u2022": u"\x95", # BULLET | |
u"\u2013": u"\x96", # EN DASH | |
u"\u2014": u"\x97", # EM DASH | |
u"\u02DC": u"\x98", # SMALL TILDE | |
u"\u2122": u"\x99", # TRADE MARK SIGN | |
u"\u0161": u"\x9A", # LATIN SMALL LETTER S WITH CARON | |
u"\u203A": u"\x9B", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | |
u"\u0153": u"\x9C", # LATIN SMALL LIGATURE OE | |
u"\u017E": u"\x9E", # LATIN SMALL LETTER Z WITH CARON | |
u"\u0178": u"\x9F", # LATIN CAPITAL LETTER Y WITH DIAERESIS | |
} | |
def __init__(self): | |
return | |
# ------------------------------------------------------------------------- | |
# define the CSV to KML file method | |
# ------------------------------------------------------------------------- | |
def csv2kml(self, csvFile, tgtPath, kmlFile, maxRecords, flushCount): | |
print ("") | |
print ("========================") | |
print ("CSV to KML conversion...") | |
print ("------------------------") | |
ogr.UseExceptions() | |
# expand any leading tilde | |
# to the user's home path | |
csvFile = os.path.expanduser(csvFile) | |
kmlFile = os.path.expanduser(kmlFile) | |
# verify that SHP file exists | |
if not os.path.exists(csvFile): | |
print ("CSV file '%s' does NOT exist!" % csvFile) | |
return | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if kmlFile != "": | |
tgtFldr = os.path.dirname(kmlFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
print ("csvFile = %s" % csvFile) | |
if kmlFile == "": | |
kmlFile = os.path.join(tgtPath, os.path.basename(csvFile).lower().replace('.csv','.kml').replace('.txt','.kml')) | |
if os.path.dirname(kmlFile) == "": | |
kmlFile = os.path.join(tgtPath, kmlFile) | |
print ("kmlFile = %s" % kmlFile) | |
csvreader=csv.reader(open(csvFile,'r')) | |
headers=csvreader.next() | |
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile) | |
kmlLyr = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0]) | |
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column) | |
field_def = ogr.FieldDefn(field) | |
kmlLyr.CreateField(field_def) | |
bgnTime = time.time() | |
rows = 0 | |
for rec in csvreader: | |
feat = ogr.Feature(kmlLyr.GetLayerDefn()) | |
for i,field in enumerate(headers[:-1]): #skip kmlgeometry | |
feat.SetField(field, rec[i]) | |
feat.SetGeometry(ogr.CreateGeometryFromGML(rec[-1])) | |
kmlLyr.CreateFeature(feat) | |
rows += 1 | |
if rows % flushCount == 0: | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
#clean up | |
del kmlLyr,kmlDs | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the DBF file to CSV file method | |
# ------------------------------------------------------------------------- | |
def dbf2csv(self, dbfFile, tgtPath, csvFile, maxRecords, flushCount, csvDelimiter=','): | |
print("") | |
print("=============================") | |
print("DBF to CSV file conversion...") | |
print("-----------------------------") | |
print("dbfFile '%s'" % dbfFile) | |
print("csvFile '%s'" % csvFile) | |
# expand any leading tilde | |
# to the user's home path | |
dbfFile = os.path.expanduser(dbfFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that DBF file exists | |
if not os.path.exists(dbfFile): | |
print ("DBF file '%s' does NOT exist!" % dbfFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(dbfFile).lower().replace('.dbf','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
dbffile = dbf.Dbf(dbfFile) | |
colNames = [] | |
for colName in dbffile.header.fields: | |
colNames.append(colName) | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.Writer(fWriter, delimiter=csvDelimiter) | |
csvwriter.writerow(colNames) | |
bgnTime = time.time() | |
rows = 0 | |
for row in dbffile: | |
csvwriter.writerow(row) | |
rows += 1 | |
if rows % flushCount == 0: | |
fWriter.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
#clean up | |
dbffile.close() | |
fWriter.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the KML to CSV file method | |
# ------------------------------------------------------------------------- | |
def kml2csv(self, kmlFile, tgtPath, csvFile, maxRecords, flushCount): | |
return | |
# ------------------------------------------------------------------------- | |
# define the MDB to CSV file method | |
# ------------------------------------------------------------------------- | |
def mdb2csv(self, mdbFile, tgtPath, csvFile, tblName, maxRecords, flushCount, csvDelimiter=','): | |
print("") | |
print("=============================") | |
print("MDB to CSV file conversion...") | |
print("-----------------------------") | |
print("mdbFile '%s'" % mdbFile) | |
print("csvFile '%s'" % csvFile) | |
print("tblName '%s'" % tblName) | |
# expand any leading tilde | |
# to the user's home path | |
mdbFile = os.path.expanduser(mdbFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that MDB file exists | |
if not os.path.exists(mdbFile): | |
print ("TAB file '%s' does NOT exist!" % mdbFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(mdbFile).lower().replace('.mdb','.csv').replace('.accdb','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
bgnTime = time.time() | |
mdbFile = os.path.abspath(mdbFile) | |
connString = "Driver={Microsoft Access Driver (*.mdb)};DBQ=%s;" % mdbFile | |
print ("pyODBC connString: %s" % connString) | |
conn = pyodbc.connect(connString) | |
cursor = conn.cursor() | |
colNames = collections.OrderedDict() | |
colValues = collections.OrderedDict() | |
for row in cursor.columns(table='%s' % tblName): | |
colNames[row.column_name] = row.column_name | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=colNames) | |
csvwriter.writeheader() | |
cursor.execute("select * from [%s]" % tblName) | |
rows = 0 | |
for row in cursor: | |
colValues.clear() | |
idx = 0 | |
for colName in colNames: | |
colValues[colName] = row[idx] | |
idx += 1 | |
csvwriter.writerow(colValues) | |
rows += 1 | |
if rows % flushCount == 0: | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
conn.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the MDB to CSV file method | |
# ------------------------------------------------------------------------- | |
def pdf2csv(self, pdfFile, tgtPath, csvFile, maxRecords, flushCount): | |
return | |
# ------------------------------------------------------------------------- | |
# define the SHP to CSV file method | |
# ------------------------------------------------------------------------- | |
def shp2csv(self, shpFile, tgtPath, csvFile, maxRecords, flushCount): | |
print ("") | |
print ("========================") | |
print ("SHP to CSV conversion...") | |
print ("------------------------") | |
# expand any leading tilde | |
# to the user's home path | |
shpFile = os.path.expanduser(shpFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that SHP file exists | |
if not os.path.exists(shpFile): | |
print ("SHP file '%s' does NOT exist!" % shpFile) | |
return | |
elif not shpFile.lower().endswith(".shp"): | |
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile) | |
return | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
print ("shpFile = %s" % shpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# Open files | |
csvfile=open(csvFile,'w', newline='') | |
shpDs=ogr.Open(shpFile) | |
shpLayer=shpDs.GetLayer() | |
# Get field names | |
shpDfn=shpLayer.GetLayerDefn() | |
nfields=shpDfn.GetFieldCount() | |
headers=[] | |
for i in range(nfields): | |
headers.append(shpDfn.GetFieldDefn(i).GetName()) | |
headers.append('kmlgeometry') | |
csvwriter = csv.DictWriter(csvfile, headers) | |
try: | |
csvwriter.writeheader() #python 2.7+ | |
except: | |
csvfile.write(','.join(headers)+'\n') | |
bgnTime = time.time() | |
# Write attributes and kml geometry out to csv | |
rows = 0 | |
for shpFeat in shpLayer: | |
try: | |
attributes=shpFeat.items() | |
shpGeom=shpFeat.GetGeometryRef() | |
try: | |
attributes['kmlgeometry']=shpGeom.ExportToKML() | |
except AttributeError as e: | |
attributes['kmlgeometry']="" | |
print (e) | |
csvwriter.writerow(attributes) | |
except UnicodeEncodeError as e: | |
print (e) | |
rows += 1 | |
if rows % flushCount == 0: | |
csvfile.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
# clean up | |
del csvwriter,shpLayer,shpDs | |
csvfile.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the SHP to KML file method | |
# ------------------------------------------------------------------------- | |
def shp2kml(self, shpFile, tgtPath, kmlFile, maxRecords, flushCount): | |
print ("") | |
print ("========================") | |
print ("SHP to KML conversion...") | |
print ("------------------------") | |
# expand any leading tilde | |
# to the user's home path | |
shpFile = os.path.expanduser(shpFile) | |
kmlFile = os.path.expanduser(kmlFile) | |
# verify that SHP file exists | |
if not os.path.exists(shpFile): | |
print ("SHP file '%s' does NOT exist!" % shpFile) | |
return | |
elif not shpFile.lower().endswith(".shp"): | |
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile) | |
return | |
# make sure the KML target folder exists, | |
# creating it recursively if it does not | |
if kmlFile != "": | |
tgtFldr = os.path.dirname(kmlFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
print ("shpFile = %s" % shpFile) | |
if kmlFile == "": | |
kmlFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.kml')) | |
if os.path.dirname(kmlFile) == "": | |
kmlFile = os.path.join(tgtPath, kmlFile) | |
print ("kmlFile = %s" % kmlFile) | |
# Open files | |
shpDs=ogr.Open(shpFile) | |
shpLayer=shpDs.GetLayer() | |
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile) | |
kmlLayer = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0]) | |
# Get field names | |
shpDfn=shpLayer.GetLayerDefn() | |
nfields=shpDfn.GetFieldCount() | |
headers=[] | |
for i in range(nfields): | |
headers.append(shpDfn.GetFieldDefn(i).GetName()) | |
field = shpDfn.GetFieldDefn(i).GetName() | |
field_def = ogr.FieldDefn(field) | |
kmlLayer.CreateField(field_def) | |
headers.append('kmlgeometry') | |
bgnTime = time.time() | |
# Write attributes and kml out to csv | |
rows = 0 | |
for shpFeat in shpLayer: | |
attributes=shpFeat.items() | |
shpGeom=shpFeat.GetGeometryRef() | |
attributes['kmlgeometry']=shpGeom.ExportToKML() | |
# print (attributes) | |
kmlFeat = ogr.Feature(kmlLayer.GetLayerDefn()) | |
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column) | |
kmlFeat.SetField(field, attributes[field]) | |
kmlFeat.SetGeometry(ogr.CreateGeometryFromGML(attributes['kmlgeometry'])) | |
kmlLayer.CreateFeature(kmlFeat) | |
rows += 1 | |
if rows % flushCount == 0: | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("Rows: {:,}".format(rows)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
# clean up | |
del shpLayer,shpDs,kmlLayer,kmlDs | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the SHP to KML and CSV files method | |
# ------------------------------------------------------------------------- | |
def shp2kmlcsv(self, shpFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount): | |
print ("") | |
print ("================================") | |
print ("SHP to KML and CSV conversion...") | |
print ("--------------------------------") | |
# expand any leading tilde | |
# to the user's home path | |
shpFile = os.path.expanduser(shpFile) | |
kmlFile = os.path.expanduser(kmlFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that SHP file exists | |
if not os.path.exists(shpFile): | |
print ("SHP file '%s' does NOT exist!" % shpFile) | |
return | |
elif not shpFile.lower().endswith(".shp"): | |
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile) | |
return | |
# make sure the KML target folder exists, | |
# creating it recursively if it does not | |
if kmlFile != "": | |
tgtFldr = os.path.dirname(kmlFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
print ("shpFile = %s" % shpFile) | |
if kmlFile == "": | |
kmlFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.kml')) | |
if os.path.dirname(kmlFile) == "": | |
kmlFile = os.path.join(tgtPath, kmlFile) | |
print ("kmlFile = %s" % kmlFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# Open files | |
csvfile=open(csvFile,'w', newline='') | |
shpDs=ogr.Open(shpFile) | |
shpLayer=shpDs.GetLayer() | |
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile) | |
kmlLayer = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0]) | |
# Get field names | |
shpDfn=shpLayer.GetLayerDefn() | |
nfields=shpDfn.GetFieldCount() | |
headers=[] | |
for i in range(nfields): | |
headers.append(shpDfn.GetFieldDefn(i).GetName()) | |
field = shpDfn.GetFieldDefn(i).GetName() | |
field_def = ogr.FieldDefn(field) | |
kmlLayer.CreateField(field_def) | |
headers.append('kmlgeometry') | |
csvwriter = csv.DictWriter(csvfile, headers) | |
try: | |
csvwriter.writeheader() #python 2.7+ | |
except: | |
header = ','.join(headers) + '\n' | |
csvfile.write(header.encode('utf-8')) | |
bgnTime = time.time() | |
# Write attributes and kml out to csv | |
rows = 0 | |
for shpFeat in shpLayer: | |
attributes=shpFeat.items() | |
shpGeom=shpFeat.GetGeometryRef() | |
attributes['kmlgeometry']=shpGeom.ExportToKML() | |
csvwriter.writerow(attributes) | |
kmlFeat = ogr.Feature(kmlLayer.GetLayerDefn()) | |
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column) | |
kmlFeat.SetField(field, attributes[field]) | |
kmlFeat.SetGeometry(ogr.CreateGeometryFromGML(attributes['kmlgeometry'])) | |
kmlLayer.CreateFeature(kmlFeat) | |
rows += 1 | |
if rows % flushCount == 0: | |
csvfile.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
# clean up | |
del csvwriter,shpLayer,shpDs,kmlLayer,kmlDs | |
csvfile.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the TAB-delimited file to CSV file method | |
# ------------------------------------------------------------------------- | |
def tab2csv(self, tabFile, tgtPath, csvFile, maxRecords, flushCount, tabDelimiter='\t', csvDelimiter=','): | |
print("") | |
print("=============================") | |
print("TAB to CSV file conversion...") | |
print("-----------------------------") | |
print("tabFile '%s'" % tabFile) | |
print("csvFile '%s'" % csvFile) | |
# expand any leading tilde | |
# to the user's home path | |
tabFile = os.path.expanduser(tabFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that TAB file exists | |
if not os.path.exists(tabFile): | |
print ("TAB file '%s' does NOT exist!" % tabFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tabFile).lower().replace('.tab','.csv').replace('.txt','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
fReader = open(tabFile,'r') | |
csvreader=csv.DictReader(fReader, delimiter=tabDelimiter) | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=csvreader.fieldnames) | |
csvwriter.writeheader() | |
bgnTime = time.time() | |
rows = 0 | |
cols = dict() | |
for rowDict in csvreader: | |
cols.clear() | |
for colName in csvreader.fieldnames: | |
cols[colName] = ' '.join(rowDict[colName].strip().split()) | |
csvwriter.writerow(cols) | |
rows += 1 | |
if rows % flushCount == 0: | |
fWriter.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
#clean up | |
fReader.close() | |
fWriter.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the TXT file to CSV file method | |
# ------------------------------------------------------------------------- | |
def txt2csv(self, txtFile, tgtPath, csvFile, maxRecords, flushCount, txtDelimiter=',', csvDelimiter=','): | |
print("") | |
print("=============================") | |
print("TXT to CSV file conversion...") | |
print("-----------------------------") | |
print("txtFile '%s'" % txtFile) | |
print("csvFile '%s'" % csvFile) | |
# expand any leading tilde | |
# to the user's home path | |
txtFile = os.path.expanduser(txtFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that TXT file exists | |
if not os.path.exists(txtFile): | |
print ("TXT file '%s' does NOT exist!" % txtFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(txtFile).lower().replace('.txt','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
fReader = open(txtFile,'r') | |
csvreader=csv.DictReader(fReader, delimiter=txtDelimiter) | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=csvreader.fieldnames) | |
csvwriter.writeheader() | |
bgnTime = time.time() | |
rows = 0 | |
cols = dict() | |
for rowDict in csvreader: | |
cols.clear() | |
for colName in csvreader.fieldnames: | |
try: | |
cols[colName] = ' '.join(rowDict[colName].strip().split()) | |
except AttributeError as e: | |
cols[colName] = '' | |
print (e) | |
csvwriter.writerow(cols) | |
rows += 1 | |
if rows % flushCount == 0: | |
fWriter.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
#clean up | |
fReader.close() | |
fWriter.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the XLS to CSV file method | |
# ------------------------------------------------------------------------- | |
def xls2csv(self, xlsFile, tgtPath, csvFile, maxRecords, flushCount): | |
print("") | |
print("=============================") | |
print("XLS to CSV file conversion...") | |
print("-----------------------------") | |
print("xlsFile '%s'" % xlsFile) | |
print("csvFile '%s'" % csvFile) | |
# expand any leading tilde | |
# to the user's home path | |
xlsFile = os.path.expanduser(xlsFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that TAB file exists | |
if not os.path.exists(xlsFile): | |
print ("XLS file '%s' does NOT exist!" % xlsFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(xlsFile).lower().replace('.xls','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
xlsWB = xlrd.open_workbook(xlsFile) | |
xlsSH = xlsWB.sheet_by_index(0) | |
nbrRows = xlsSH.nrows | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.writer(fWriter, delimiter=',') | |
bgnTime = time.time() | |
rows = 0 | |
values = [] | |
for rowNbr in range(0, nbrRows): | |
values.clear() | |
for entry in xlsSH.row_values(rowNbr): | |
values.append(str(entry).strip()) | |
try: | |
csvwriter.writerow(values) | |
except UnicodeEncodeError as e: | |
print (e) | |
rows += 1 | |
if rows % flushCount == 0: | |
fWriter.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
fWriter.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the unZIP a file to a target folder method | |
# ------------------------------------------------------------------------- | |
def zip2dir(self, zipFile, tgtPath): | |
print("") | |
print("=================================") | |
print("ZIP to TGT directory expansion...") | |
print("---------------------------------") | |
print("zipFile '%s'" % zipFile) | |
print("tgtPath '%s'" % tgtPath) | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
# create the target folder | |
# recursively if not extant | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# extract all of the files/folders | |
# within the zip file to the target folder | |
with zipfile.ZipFile(zipFile, "r") as z: | |
z.extractall(tgtPath) | |
return | |
# ------------------------------------------------------------------------- | |
# define the ZIP file to a TXT file and then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2csv2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, srcDelimiter=',', csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the SHP file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".csv"): | |
tmpFile = os.path.join(unzPath, aFile) | |
break | |
# verify that a SHP file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a CSV file!" % zipFile) | |
return | |
else: | |
print ("zipFile = %s" % zipFile) | |
print ("srcFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower()) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV file from the SRC file | |
self.txt2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, srcDelimiter, csvDelimiter) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# defin the ZIP file to a DBF file, then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2dbf2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the TAB or TXT file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".dbf"): | |
tmpFile = os.path.join(unzPath, aFile) | |
print ("zipFile = %s" % zipFile) | |
print ("dbfFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.dbf','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV file from the DBF file | |
self.dbf2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, csvDelimiter) | |
# verify that a TAB or TXT file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a DBF file!" % zipFile) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# defin the ZIP file to a MDB file, then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2mdb2csv(self, zipFile, tgtPath, csvFile, tblName, maxRecords, flushCount, unzPath, csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the TAB or TXT file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".mdb") or aFile.lower().endswith(".accdb"): | |
tmpFile = os.path.join(unzPath, aFile) | |
print ("zipFile = %s" % zipFile) | |
print ("mdbFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.mdb','.csv').replace('.accdb','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV files from the TAB file | |
self.mdb2csv(tmpFile, tgtPath, csvFile, tblName, maxRecords, flushCount, csvDelimiter) | |
# verify that a TAB or TXT file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a MDB or ACCDB file!" % zipFile) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# define the ZIP file to a SHP file, then to CSV files method | |
# ------------------------------------------------------------------------- | |
def zip2shp2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the SHP file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".shp"): | |
tmpFile = os.path.join(unzPath, aFile) | |
break | |
# verify that a SHP file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a SHP file!" % zipFile) | |
return | |
else: | |
print ("zipFile = %s" % zipFile) | |
print ("shpFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV file from the SHP file | |
self.shp2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# define the ZIP file to a SHP file, then to KML and CSV files method | |
# ------------------------------------------------------------------------- | |
def zip2shp2kmlcsv(self, zipFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount, unzPath): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the SHP file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".shp"): | |
tmpFile = os.path.join(unzPath, aFile) | |
break | |
# verify that a SHP file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a SHP file!" % zipFile) | |
return | |
else: | |
print ("zipFile = %s" % zipFile) | |
print ("shpFile = %s" % tmpFile) | |
if kmlFile == "": | |
kmlFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.kml')) | |
if os.path.dirname(kmlFile) == "": | |
kmlFile = os.path.join(tgtPath, kmlFile) | |
print ("kmlFile = %s" % kmlFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the KML and CSV files from the SHP file | |
self.shp2kmlcsv(tmpFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# defin the ZIP file to a TAB file, then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2tab2csv(self, zipFile, tgtPath, tabFile, csvFile, maxRecords, flushCount, unzPath, tabDelimiter='\t', csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the TAB or TXT file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".tab") or aFile.lower().endswith(".txt"): | |
tmpFile = os.path.join(unzPath, aFile) | |
print ("zipFile = %s" % zipFile) | |
print ("tabFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.tab','.csv').replace('.txt','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV files from the TAB file | |
self.tab2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, tabDelimiter, csvDelimiter) | |
# verify that a TAB or TXT file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a TAB or TXT file!" % zipFile) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# define the ZIP file to a TXT file and then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2txt2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, txtDelimiter=',', csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the SHP file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".txt"): | |
tmpFile = os.path.join(unzPath, aFile) | |
break | |
# verify that a SHP file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a TXT file!" % zipFile) | |
return | |
else: | |
print ("zipFile = %s" % zipFile) | |
print ("txtFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.txt','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV file from the TXT file | |
self.txt2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, txtDelimiter, csvDelimiter) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# defin the ZIP file to a XLS file, then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2xls2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the TAB or TXT file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".xls") or aFile.lower().endswith(".xlsx"): | |
tmpFile = os.path.join(unzPath, aFile) | |
print ("zipFile = %s" % zipFile) | |
print ("xlsFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.xls','.csv').replace('.xlsx','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV files from the XLS file | |
self.xls2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount) | |
# verify that a TAB or TXT file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a XLS file!" % zipFile) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return |
This is very usefull! Have you looked into adding Texas?
nate1206: please forgive the late response. Texas has a lot of its stuff behind what is essentially a "paywall" of sorts.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Within the PyFrackETL.py script, add which states you'd like to download to the 'states' array towards the top of the 'main' routine. A valid list of states can be found as the keys in the 'listStates' dictionary a few lines down in the source code.