Skip to content

Instantly share code, notes, and snippets.

@emraher
Forked from KhepryQuixote/PyFrackETL.py
Created July 12, 2016 09:59
Show Gist options
  • Save emraher/e8711b0b3d962404a8072eb05c83252d to your computer and use it in GitHub Desktop.
Save emraher/e8711b0b3d962404a8072eb05c83252d to your computer and use it in GitHub Desktop.
Python scripts to download, extract, and output oil, gas, and/or fracking well information from various states in the United States of America.
# -*- coding: utf-8 -*-
'''
This Python script is oriented towards oil, gas, and fracking well
information download, extract, and output at the state-level within
the United States of America. Not all states are present as not all
states either allow fracking or have any sites available for download.
This Python script is dependent upon another Python script entitled
Src2Tgt.py, which is also available as a Gist under the same name.
'''
import ftplib
import os
import socket
import urllib
import requests
from Src2Tgt import *
from urllib.request import urlretrieve
# global variables
downloadFiles = False
unpackFiles = True
maxRecords = 0
flushCount = 1000
# =============================================================================
# Main routine:
# =============================================================================
def main():
states = ['OH','PA'] # these states are still thorns in my side at the moment
states = ['NY'] # this or these states are the ones I wish to download
# NOTE: 'OH' fails because of the FTP site
# is either not active or is not allowing connections
# NOTE: 'PA' fails because of a 302 redirect
# Look under your Chrome "Python" bookmarks
# for sample code as to how to handle redirects
# This was somewhat solved using "requests" instead of "urllib",
# however, now the website wants the program to "agree" by pressing
# a button on the redirect screen
# Can I programmatically "submit" an "agree"?
# <input type="submit" name="ctl00$MainContent$AgreeButton" value="Agree" id="MainContent_AgreeButton" />
# I've done it before via C# and Java, but what the hell, why not in Python, eh?
mainPath = "/home/temp/fracking/data"
downloadPath = os.path.join(mainPath, "download")
unpackedPath = os.path.join(mainPath, "unpacked")
listStates = {'AK':'Alaska',
'AL':'Alabama',
'AR':'Arkansas',
'CA':'California',
'CO':'Colorado',
'FL':'Florida',
'ID':'Idaho',
'KS':'Kansas',
'LA':'Louisiana',
'MI':'Michigan',
'MO':'Missouri',
'MT':'Montana',
'ND':'North Dakota',
'NE':'Nebraska',
'NY':'New York',
'OH':'Ohio',
'OK':'Oklahoma',
'OR':'Oregon',
'PA':'Pennsylvania',
'UT':'Utah',
'VA':'Virginia',
'WA':'Washington',
'WV':'West Virginia'}
for state in states:
if state in listStates:
print ('--------------------')
print ('%s (%s) process' % (listStates[state], state))
process_files(downloadPath, unpackedPath, state, downloadFiles, unpackFiles, maxRecords, flushCount)
# =============================================================================
# Well information downloads and processing, state-by-state:
# =============================================================================
def process_files(dwnPath, unpPath, state, downloadFiles, unpackFiles, maxRecords, flushCount):
unzPath = ""
dwnPath = os.path.join(dwnPath, state)
unpPath = os.path.join(unpPath, state)
if not os.path.exists(dwnPath):
os.makedirs(dwnPath)
if not os.path.exists(unpPath):
os.makedirs(unpPath)
src2tgt = Src2Tgt()
# -------------------------------------------------------------------------
# Alaska
# -------------------------------------------------------------------------
if state == 'AK':
tgtFile = "AOGCC_DataExtract.zip"
if downloadFiles:
httpUrl = "http://aogweb.state.ak.us/Data_Extract/AOGCC_DataExtract.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'tblWellMaster', maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Alabama
# -------------------------------------------------------------------------
if state == 'AL':
tgtFile = "wells.zip"
if downloadFiles:
httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/wells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "bhl_loc.zip"
if downloadFiles:
httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/bhl_loc.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "bhl_lines.zip"
if downloadFiles:
httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/bhl_lines.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Arkansas
# -------------------------------------------------------------------------
if state == 'AR':
tgtFile = "Natural_Gas_and_Oil_Wells.zip"
if downloadFiles:
httpUrl = "http://aogc2.state.ar.us/GIS_GOOGLE/Natural_Gas_and_Oil_Wells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
pass # TODO: need to process these as kml2csv
tgtFile = "Drilling_Fluid_Disposal_Sites.zip"
if downloadFiles:
httpUrl = "http://aogc2.state.ar.us/GIS_GOOGLE/Drilling_Fluid_Disposal_Sites.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
pass # TODO: need to process these as kml2csv
# -------------------------------------------------------------------------
# California
# -------------------------------------------------------------------------
if state == 'CA':
tgtFile = "AllWells_Excel.zip"
if downloadFiles:
httpUrl = "ftp://ftp.consrv.ca.gov/pub/oil/GIS/ExcelTables/AllWells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = "AllWells_Excel.csv"
src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "AllWells_Shape.zip"
if downloadFiles:
httpUrl = "ftp://ftp.consrv.ca.gov/pub/oil/GIS/Shapefiles/AllWells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = "AllWells_Shape.csv"
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Colorado
# -------------------------------------------------------------------------
if state == 'CO':
tgtFile = "WELL_SHP.ZIP"
if downloadFiles:
httpUrl = "http://cogcc.state.co.us/Downloads/WELL_SHP.ZIP"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "DIR_BHLS.ZIP"
if downloadFiles:
httpUrl = "http://cogcc.state.co.us/Downloads/DIR_BHLS.ZIP"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "DIRLINES.ZIP"
if downloadFiles:
httpUrl = "http://cogcc.state.co.us/Downloads/DIRLINES.ZIP"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Florida
# -------------------------------------------------------------------------
if state == 'FL':
tgtFile = "og-permit-database.xlsx"
if downloadFiles:
httpUrl = "http://www.dep.state.fl.us/water/mines/oil_gas/docs/og-permit-database.xlsx"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
xlsFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.xlsx','.csv')
src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount)
tgtFile = "pre_permit_oil_gas_wells.xls"
if downloadFiles:
httpUrl = "http://www.dep.state.fl.us/water/mines/oil_gas/docs/pre_permit_oil_gas_wells.xls"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
xlsFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.xls','.csv')
src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount)
# -------------------------------------------------------------------------
# Idaho
# -------------------------------------------------------------------------
if state == 'ID':
# TODO: will need to adjust the lat/longs in that they're in
# the Idaho Transverse Mercator (IDTM) projection.
tgtFile = "AllPermittedWells.zip"
if downloadFiles:
httpUrl = "http://www.idwr.idaho.gov/ftp/gisdata/Spatial/Wells/WellConstruction/wells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "InjectionWells.zip"
if downloadFiles:
httpUrl = "http://www.idwr.idaho.gov/ftp/gisdata/Spatial/Wells/UndergroundInjectionControl/InjectionWells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Kansas
# -------------------------------------------------------------------------
if state == 'KS':
tgtFile = "ks_wells.zip"
if downloadFiles:
httpUrl = "http://www.kgs.ku.edu/PRS/Ora_Archive/ks_wells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = "" # empty string forces CSV file to same name as each unzipped file
src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "OILGAS_WELLS_GEO27.zip"
if downloadFiles:
httpUrl = "http://www.kgs.ku.edu/PRS/Ora_Archive/OILGAS_WELLS_GEO27.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Louisiana
# -------------------------------------------------------------------------
if state == 'LA':
tgtFile = "wells.zip"
if downloadFiles:
httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/wells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "BOTTOM_HOLE.zip"
if downloadFiles:
httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/BOTTOM_HOLE.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "BOTTOM_HOLE_LINE.zip"
if downloadFiles:
httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/BOTTOM_HOLE_LINE.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "WATER_WELL_REG.zip"
if downloadFiles:
httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/WATER_WELL_REG.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Michigan
# -------------------------------------------------------------------------
if state == 'MI':
tgtFile = "ogs-oilandgas-well-locations.zip"
if downloadFiles:
ftpUrl = "ftp.deq.state.mi.us"
ftpPath = "geowebface/DownLoads"
ftpFile = "ogs-oilandgas-well-locations.zip"
userName = "GeoWebFace"
passWord = "Geology(1)"
ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'well locations', maxRecords, flushCount, unzPath)
tgtFile = "ogs-oilandgas-injection-data_278433_7.zip"
if downloadFiles:
httpUrl = "http://www.michigan.gov/documents/deq/ogs-oilandgas-injection-data_278433_7.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'Injection Wells Data', maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Missouri
# -------------------------------------------------------------------------
if state == 'MO':
tgtFile = "permits-6-5-2014.xls"
if downloadFiles:
httpUrl = "http://www.dnr.mo.gov/geology/geosrv/docs/permits6-5-2014.xls"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
xlsFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.xls','.csv')
src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount)
# -------------------------------------------------------------------------
# Montana
# -------------------------------------------------------------------------
if state == 'MT':
# TODO: will have to figure out how to obtain these automatically!
tgtFile = "WellSurfaceLongLat.aspx"
if downloadFiles:
httpUrl = "http://www.bogc.dnrc.mt.gov/WebApps/DataMiner/Wells/WellSurfaceLongLat.aspx"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
pass
tgtFile = "WellBLSLongLat.aspx"
if downloadFiles:
httpUrl = "http://www.bogc.dnrc.mt.gov/WebApps/DataMiner/Wells/WellBLSLongLat.aspx"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
pass
# -------------------------------------------------------------------------
# North Dakota
# -------------------------------------------------------------------------
if state == 'ND':
tgtFile = "Wells.zip"
if downloadFiles:
httpUrl = "https://www.dmr.nd.gov/output/ShapeFiles/Wells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Nebraska
# -------------------------------------------------------------------------
if state == 'NE':
tgtFile = "NebraskaWellData.zip"
if downloadFiles:
httpUrl = "http://www.nogcc.ne.gov/Publications/NebraskaWellData.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'tblNebraskaWellData', maxRecords, flushCount, unzPath)
tgtFile = "NE_WELLS.zip"
if downloadFiles:
httpUrl = "http://www.nogcc.ne.gov/Publications/NE_WELLS.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.zip','.csv')
# TODO: might be nice to tweak zip2shp2csv to compress multiple spaces down to one space
src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "NebraskaWellStatus_TypeCodes.pdf"
if downloadFiles:
httpUrl = "http://www.nogcc.ne.gov/Publications/NebraskaWellStatus_TypeCodes.pdf"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
shutil.copy2(os.path.join(dwnPath, tgtFile), os.path.join(unpPath, tgtFile))
# -------------------------------------------------------------------------
# New York
# -------------------------------------------------------------------------
if state == 'NY':
tgtFile = "wellDOS.zip"
if downloadFiles:
httpUrl = "ftp://ftp.dec.state.ny.us/dmn/zip/wellDOS.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = ""
src2tgt.zip2csv2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# -------------------------------------------------------------------------
# Ohio
# -------------------------------------------------------------------------
if state == 'OH':
# TODO: This FTP site appears to be no longer available
ftpUrl = "ftp.dnr.state.oh.us"
userName = ""
passWord = ""
ftpPath = "OilGas/Upload/RBDMS"
tgtFile = "Setup.exe"
if downloadFiles:
ftpFile = "Setup.exe"
ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile)
tgtFile = "Rdbmsd97.exe"
if downloadFiles:
ftpFile = "Rdbmsd97.exe"
ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile)
if unpackFiles:
pass
# -------------------------------------------------------------------------
# Oklahoma
# -------------------------------------------------------------------------
if state == 'OK':
tgtFile = "W27BASE.ZIP"
if downloadFiles:
httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/W27BASE.ZIP"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = "" # empty string forces CSV file to same name as each unzipped file
src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath, '|', ',')
tgtFile = "W97OPER.ZIP"
if downloadFiles:
httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/W97OPER.ZIP"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = "" # empty string forces CSV file to same name as each unzipped file
src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath, '|', ',')
tgtFile = "CONTENTS.XLS"
if downloadFiles:
httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/CONTENTS.XLS"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
shutil.copy2(os.path.join(dwnPath, tgtFile), os.path.join(unpPath, tgtFile))
# -------------------------------------------------------------------------
# Oregon
# -------------------------------------------------------------------------
if state == 'OR':
tgtFile = "OG_Permits_06-25-2013.xlsx"
if downloadFiles:
httpUrl = "http://www.oregongeology.org/mlrr/spreadsheets/OG_Permits_06-25-2013.xlsx"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
xlsFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.xlsx','.csv')
src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount)
# -------------------------------------------------------------------------
# Pennsylvania
# -------------------------------------------------------------------------
# TODO: This may have to be done manually until
# I can figure out how to "submit"
# an "agree" button to a redirected form
if state == 'PA':
tgtFile = "Conventional_Wells_2013_01-12.csv"
if downloadFiles:
httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-0"
requests_download(httpUrl, dwnPath, tgtFile)
tgtFile = "Unconventional_Wells_2013_01-06.csv"
if downloadFiles:
httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-1"
requests_download(httpUrl, dwnPath, tgtFile)
tgtFile = "Unconventional_Wells_2013_07-12.csv"
if downloadFiles:
httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-2"
requests_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
pass
# -------------------------------------------------------------------------
# Utah
# -------------------------------------------------------------------------
if state == 'UT':
tgtFile = "welldata.exe"
if downloadFiles:
httpUrl = "https://oilgas.ogm.utah.gov/pub/Database/welldata.exe"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
# TODO: will have to come up with an exe2dbf2csv routine
# in order to process this state's data file(s)
pass
# -------------------------------------------------------------------------
# Virginia
# -------------------------------------------------------------------------
if state == 'VA':
tgtFile = "DgoWellLocation.pdf"
if downloadFiles:
httpUrl = "http://www.dmme.virginia.gov/dgoinquiry/frmPrint.aspx?Form=DgoWellLocation"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
# TODO: will need to write a pdf2csv routine
pdfFile = os.path.join(dwnPath, tgtFile)
csvFile = tgtFile.lower().replace('.pdf','.csv')
src2tgt.pdf2csv(pdfFile, unpPath, csvFile, maxRecords, flushCount)
pass
# -------------------------------------------------------------------------
# Washington
# -------------------------------------------------------------------------
if state == 'WA':
# TODO: this one appears to be a total ass
# to work with, maybe ogr2ogr can handle it
# TODO: come up with a gdb2csv routine
tgtFile = "ger_portal_oil_gas_wells.zip"
if downloadFiles:
httpUrl = "http://www.dnr.wa.gov/Publications/ger_portal_oil_gas_wells.zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
pass
# -------------------------------------------------------------------------
# West Virginia
# -------------------------------------------------------------------------
if state == 'WV':
# TODO: first column is missing column name
# visual inspection reveals that the values
# are simply the row number and can be safely
# overlooked for the time being
tgtFile = "WellLocationPart1(10-31-2013).zip"
if downloadFiles:
httpUrl = "http://www.dep.wv.gov/oil-and-gas/databaseinfo/Documents/WellLocationPart1(10-31-2013).zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = ""
src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
tgtFile = "WellLocationPart2(10-31-2013).zip"
if downloadFiles:
httpUrl = "http://www.dep.wv.gov/oil-and-gas/databaseinfo/Documents/WellLocationPart2(10-31-2013).zip"
urllib_download(httpUrl, dwnPath, tgtFile)
if unpackFiles:
zipFile = os.path.join(dwnPath, tgtFile)
csvFile = ""
src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
# =============================================================================
# FTP File Processing:
# Pull a specified file (i.e. download the file)
# =============================================================================
def ftp_download(url,
userName,
passWord,
ftpPath,
ftpFile,
tgtPath,
tgtFile):
print ("==========================")
print ("Pulling File from FTP Site")
print ("File: %s" % '/'.join([url, ftpPath, ftpFile]))
bypass = False
ftp = ftplib.FTP()
try:
ftp.connect(url)
except socket.error as e:
bypass = True
print ('ERROR: unable to connect to "%s"' % url)
print ('ERROR: FTP connection error "%s"' % e)
if not bypass:
if userName != "":
try:
ftp.login(userName, passWord)
except ftplib.error_perm as e:
bypass = True
print ('ERROR: cannot login (bad user name or password) to "%s"' % url)
print ('ERROR: FTP login error "%s"' % e)
else:
try:
# login anonymously
ftp.login()
except ftplib.error_perm as e:
bypass = True
print ('ERROR: cannot login (anonymous login forbidden) to "%s"' % url)
print ('ERROR: FTP login error "%s"' % e)
if not bypass:
try:
ftp.cwd(ftpPath)
except ftplib.error_perm as e:
bypass = True
print ('ERROR: cannot CWD to "%s"' % ftpPath)
print ('ERROR: FTP CWD error "%s"' % e)
if not bypass:
try:
# build the download target file path
tgtFilePath = os.path.join(tgtPath, tgtFile)
print ("Download file to: %s" % tgtFilePath)
ftp.retrbinary("RETR %s" % ftpFile, open(tgtFilePath, 'wb').write)
except ftplib.error_perm as e:
bypass = True
print ('ERROR: cannot read file "%s"' % ftpFile)
print ('ERROR: FTP RETR (binary) error "%s"' % e)
else:
# success!
print ('SUCCESS: downloaded "%s" to "%s"' % (ftpFile, tgtFilePath))
ftp.quit()
return not bypass
# =============================================================================
# HTTP Download routine:
# =============================================================================
def urllib_download(url, tgtPath, tgtFile):
tgtFullPath = os.path.abspath(os.path.join(tgtPath, tgtFile))
if os.path.exists(tgtPath):
print ("urllib_download, download start '%s'" % url)
try:
urlretrieve(url, tgtFullPath)
except urllib.error.HTTPError:
pass
print ("urllib_download, download ended '%s'" % url)
else:
print ("urllib_download, tgtPath does not exist '%s'" % tgtPath)
return
# =============================================================================
# HTTP Download routine:
# =============================================================================
def requests_download(url, tgtPath, tgtFile):
tgtFullPath = os.path.abspath(os.path.join(tgtPath, tgtFile))
if os.path.exists(tgtPath):
print ("urllib_download, download start '%s'" % url)
r = requests.get(url, stream=True)
with open(tgtFullPath, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
f.flush()
print ("urllib_download, download ended '%s'" % url)
else:
print ("urllib_download, tgtPath does not exist '%s'" % tgtPath)
return
# =============================================================================
# Test routine:
# =============================================================================
def test_downloads():
tgtPath = '/home/temp'
ftpUrl = 'alt.ncsbe.gov'
ftpUsr = 'anonymous'
ftpPwd = ''
ftpPath = 'data'
ftpFile = 'ncvoter89.zip'
tgtFile = 'ncvoter89.zip'
if not os.path.exists(tgtPath):
mkdir_recursively(tgtPath)
ftp_download(ftpUrl, ftpUsr, ftpPwd, ftpPath, ftpFile, tgtPath, tgtFile)
httpUrl = 'ftp://alt.ncsbe.gov/data/ncvoter89.zip'
urllib_download(httpUrl, tgtPath, tgtFile)
# ============================================================================
# execute the mainline processing routine
# ============================================================================
if (__name__ == "__main__"):
retval = main()
# -*- coding: utf-8 -*-
'''
This Python script is oriented towards oil, gas, and fracking well
information download, extract, and output at the state-level within
the United States of America. Not all states are present as not all
states either allow fracking or have any sites available for download.
This Python script is depended upon by another Python script entitled
PyFrackETL.py, which is also available as a Gist under the same name.
'''
import collections
import csv
import ogr
import os
import pyodbc
import shutil
import tempfile
import time
import xlrd
import zipfile
# from dbfpy import dbf
class Src2Tgt:
cp1252 = {
# from http://www.microsoft.com/typography/unicode/1252.htm
u"\u20AC": u"\x80", # EURO SIGN
u"\u201A": u"\x82", # SINGLE LOW-9 QUOTATION MARK
u"\u0192": u"\x83", # LATIN SMALL LETTER F WITH HOOK
u"\u201E": u"\x84", # DOUBLE LOW-9 QUOTATION MARK
u"\u2026": u"\x85", # HORIZONTAL ELLIPSIS
u"\u2020": u"\x86", # DAGGER
u"\u2021": u"\x87", # DOUBLE DAGGER
u"\u02C6": u"\x88", # MODIFIER LETTER CIRCUMFLEX ACCENT
u"\u2030": u"\x89", # PER MILLE SIGN
u"\u0160": u"\x8A", # LATIN CAPITAL LETTER S WITH CARON
u"\u2039": u"\x8B", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u"\u0152": u"\x8C", # LATIN CAPITAL LIGATURE OE
u"\u017D": u"\x8E", # LATIN CAPITAL LETTER Z WITH CARON
u"\u2018": u"\x91", # LEFT SINGLE QUOTATION MARK
u"\u2019": u"\x92", # RIGHT SINGLE QUOTATION MARK
u"\u201C": u"\x93", # LEFT DOUBLE QUOTATION MARK
u"\u201D": u"\x94", # RIGHT DOUBLE QUOTATION MARK
u"\u2022": u"\x95", # BULLET
u"\u2013": u"\x96", # EN DASH
u"\u2014": u"\x97", # EM DASH
u"\u02DC": u"\x98", # SMALL TILDE
u"\u2122": u"\x99", # TRADE MARK SIGN
u"\u0161": u"\x9A", # LATIN SMALL LETTER S WITH CARON
u"\u203A": u"\x9B", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u"\u0153": u"\x9C", # LATIN SMALL LIGATURE OE
u"\u017E": u"\x9E", # LATIN SMALL LETTER Z WITH CARON
u"\u0178": u"\x9F", # LATIN CAPITAL LETTER Y WITH DIAERESIS
}
def __init__(self):
return
# -------------------------------------------------------------------------
# define the CSV to KML file method
# -------------------------------------------------------------------------
def csv2kml(self, csvFile, tgtPath, kmlFile, maxRecords, flushCount):
print ("")
print ("========================")
print ("CSV to KML conversion...")
print ("------------------------")
ogr.UseExceptions()
# expand any leading tilde
# to the user's home path
csvFile = os.path.expanduser(csvFile)
kmlFile = os.path.expanduser(kmlFile)
# verify that SHP file exists
if not os.path.exists(csvFile):
print ("CSV file '%s' does NOT exist!" % csvFile)
return
# make sure the target folder exists,
# creating it recursively if it does not
if kmlFile != "":
tgtFldr = os.path.dirname(kmlFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
print ("csvFile = %s" % csvFile)
if kmlFile == "":
kmlFile = os.path.join(tgtPath, os.path.basename(csvFile).lower().replace('.csv','.kml').replace('.txt','.kml'))
if os.path.dirname(kmlFile) == "":
kmlFile = os.path.join(tgtPath, kmlFile)
print ("kmlFile = %s" % kmlFile)
csvreader=csv.reader(open(csvFile,'r'))
headers=csvreader.next()
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile)
kmlLyr = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0])
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column)
field_def = ogr.FieldDefn(field)
kmlLyr.CreateField(field_def)
bgnTime = time.time()
rows = 0
for rec in csvreader:
feat = ogr.Feature(kmlLyr.GetLayerDefn())
for i,field in enumerate(headers[:-1]): #skip kmlgeometry
feat.SetField(field, rec[i])
feat.SetGeometry(ogr.CreateGeometryFromGML(rec[-1]))
kmlLyr.CreateFeature(feat)
rows += 1
if rows % flushCount == 0:
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
if maxRecords > 0 and rows >= maxRecords:
break
#clean up
del kmlLyr,kmlDs
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the DBF file to CSV file method
# -------------------------------------------------------------------------
def dbf2csv(self, dbfFile, tgtPath, csvFile, maxRecords, flushCount, csvDelimiter=','):
print("")
print("=============================")
print("DBF to CSV file conversion...")
print("-----------------------------")
print("dbfFile '%s'" % dbfFile)
print("csvFile '%s'" % csvFile)
# expand any leading tilde
# to the user's home path
dbfFile = os.path.expanduser(dbfFile)
csvFile = os.path.expanduser(csvFile)
# verify that DBF file exists
if not os.path.exists(dbfFile):
print ("DBF file '%s' does NOT exist!" % dbfFile)
return
# make sure the CSV target folder exists,
# creating it recursively if it does not
if csvFile != "":
tgtFldr = os.path.dirname(csvFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(dbfFile).lower().replace('.dbf','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
dbffile = dbf.Dbf(dbfFile)
colNames = []
for colName in dbffile.header.fields:
colNames.append(colName)
fWriter = open(csvFile,'w',newline='')
csvwriter=csv.Writer(fWriter, delimiter=csvDelimiter)
csvwriter.writerow(colNames)
bgnTime = time.time()
rows = 0
for row in dbffile:
csvwriter.writerow(row)
rows += 1
if rows % flushCount == 0:
fWriter.flush()
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
if maxRecords > 0 and rows >= maxRecords:
break
#clean up
dbffile.close()
fWriter.close()
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the KML to CSV file method
# -------------------------------------------------------------------------
def kml2csv(self, kmlFile, tgtPath, csvFile, maxRecords, flushCount):
return
# -------------------------------------------------------------------------
# define the MDB to CSV file method
# -------------------------------------------------------------------------
def mdb2csv(self, mdbFile, tgtPath, csvFile, tblName, maxRecords, flushCount, csvDelimiter=','):
print("")
print("=============================")
print("MDB to CSV file conversion...")
print("-----------------------------")
print("mdbFile '%s'" % mdbFile)
print("csvFile '%s'" % csvFile)
print("tblName '%s'" % tblName)
# expand any leading tilde
# to the user's home path
mdbFile = os.path.expanduser(mdbFile)
csvFile = os.path.expanduser(csvFile)
# verify that MDB file exists
if not os.path.exists(mdbFile):
print ("TAB file '%s' does NOT exist!" % mdbFile)
return
# make sure the CSV target folder exists,
# creating it recursively if it does not
if csvFile != "":
tgtFldr = os.path.dirname(csvFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(mdbFile).lower().replace('.mdb','.csv').replace('.accdb','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
bgnTime = time.time()
mdbFile = os.path.abspath(mdbFile)
connString = "Driver={Microsoft Access Driver (*.mdb)};DBQ=%s;" % mdbFile
print ("pyODBC connString: %s" % connString)
conn = pyodbc.connect(connString)
cursor = conn.cursor()
colNames = collections.OrderedDict()
colValues = collections.OrderedDict()
for row in cursor.columns(table='%s' % tblName):
colNames[row.column_name] = row.column_name
fWriter = open(csvFile,'w',newline='')
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=colNames)
csvwriter.writeheader()
cursor.execute("select * from [%s]" % tblName)
rows = 0
for row in cursor:
colValues.clear()
idx = 0
for colName in colNames:
colValues[colName] = row[idx]
idx += 1
csvwriter.writerow(colValues)
rows += 1
if rows % flushCount == 0:
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
if maxRecords > 0 and rows >= maxRecords:
break
conn.close()
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the MDB to CSV file method
# -------------------------------------------------------------------------
def pdf2csv(self, pdfFile, tgtPath, csvFile, maxRecords, flushCount):
return
# -------------------------------------------------------------------------
# define the SHP to CSV file method
# -------------------------------------------------------------------------
def shp2csv(self, shpFile, tgtPath, csvFile, maxRecords, flushCount):
print ("")
print ("========================")
print ("SHP to CSV conversion...")
print ("------------------------")
# expand any leading tilde
# to the user's home path
shpFile = os.path.expanduser(shpFile)
csvFile = os.path.expanduser(csvFile)
# verify that SHP file exists
if not os.path.exists(shpFile):
print ("SHP file '%s' does NOT exist!" % shpFile)
return
elif not shpFile.lower().endswith(".shp"):
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile)
return
# make sure the target folder exists,
# creating it recursively if it does not
if csvFile != "":
tgtFldr = os.path.dirname(csvFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
print ("shpFile = %s" % shpFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# Open files
csvfile=open(csvFile,'w', newline='')
shpDs=ogr.Open(shpFile)
shpLayer=shpDs.GetLayer()
# Get field names
shpDfn=shpLayer.GetLayerDefn()
nfields=shpDfn.GetFieldCount()
headers=[]
for i in range(nfields):
headers.append(shpDfn.GetFieldDefn(i).GetName())
headers.append('kmlgeometry')
csvwriter = csv.DictWriter(csvfile, headers)
try:
csvwriter.writeheader() #python 2.7+
except:
csvfile.write(','.join(headers)+'\n')
bgnTime = time.time()
# Write attributes and kml geometry out to csv
rows = 0
for shpFeat in shpLayer:
try:
attributes=shpFeat.items()
shpGeom=shpFeat.GetGeometryRef()
try:
attributes['kmlgeometry']=shpGeom.ExportToKML()
except AttributeError as e:
attributes['kmlgeometry']=""
print (e)
csvwriter.writerow(attributes)
except UnicodeEncodeError as e:
print (e)
rows += 1
if rows % flushCount == 0:
csvfile.flush()
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
if maxRecords > 0 and rows >= maxRecords:
break
# clean up
del csvwriter,shpLayer,shpDs
csvfile.close()
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the SHP to KML file method
# -------------------------------------------------------------------------
def shp2kml(self, shpFile, tgtPath, kmlFile, maxRecords, flushCount):
print ("")
print ("========================")
print ("SHP to KML conversion...")
print ("------------------------")
# expand any leading tilde
# to the user's home path
shpFile = os.path.expanduser(shpFile)
kmlFile = os.path.expanduser(kmlFile)
# verify that SHP file exists
if not os.path.exists(shpFile):
print ("SHP file '%s' does NOT exist!" % shpFile)
return
elif not shpFile.lower().endswith(".shp"):
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile)
return
# make sure the KML target folder exists,
# creating it recursively if it does not
if kmlFile != "":
tgtFldr = os.path.dirname(kmlFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
print ("shpFile = %s" % shpFile)
if kmlFile == "":
kmlFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.kml'))
if os.path.dirname(kmlFile) == "":
kmlFile = os.path.join(tgtPath, kmlFile)
print ("kmlFile = %s" % kmlFile)
# Open files
shpDs=ogr.Open(shpFile)
shpLayer=shpDs.GetLayer()
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile)
kmlLayer = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0])
# Get field names
shpDfn=shpLayer.GetLayerDefn()
nfields=shpDfn.GetFieldCount()
headers=[]
for i in range(nfields):
headers.append(shpDfn.GetFieldDefn(i).GetName())
field = shpDfn.GetFieldDefn(i).GetName()
field_def = ogr.FieldDefn(field)
kmlLayer.CreateField(field_def)
headers.append('kmlgeometry')
bgnTime = time.time()
# Write attributes and kml out to csv
rows = 0
for shpFeat in shpLayer:
attributes=shpFeat.items()
shpGeom=shpFeat.GetGeometryRef()
attributes['kmlgeometry']=shpGeom.ExportToKML()
# print (attributes)
kmlFeat = ogr.Feature(kmlLayer.GetLayerDefn())
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column)
kmlFeat.SetField(field, attributes[field])
kmlFeat.SetGeometry(ogr.CreateGeometryFromGML(attributes['kmlgeometry']))
kmlLayer.CreateFeature(kmlFeat)
rows += 1
if rows % flushCount == 0:
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("Rows: {:,}".format(rows))
if maxRecords > 0 and rows >= maxRecords:
break
# clean up
del shpLayer,shpDs,kmlLayer,kmlDs
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the SHP to KML and CSV files method
# -------------------------------------------------------------------------
def shp2kmlcsv(self, shpFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount):
print ("")
print ("================================")
print ("SHP to KML and CSV conversion...")
print ("--------------------------------")
# expand any leading tilde
# to the user's home path
shpFile = os.path.expanduser(shpFile)
kmlFile = os.path.expanduser(kmlFile)
csvFile = os.path.expanduser(csvFile)
# verify that SHP file exists
if not os.path.exists(shpFile):
print ("SHP file '%s' does NOT exist!" % shpFile)
return
elif not shpFile.lower().endswith(".shp"):
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile)
return
# make sure the KML target folder exists,
# creating it recursively if it does not
if kmlFile != "":
tgtFldr = os.path.dirname(kmlFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
# make sure the CSV target folder exists,
# creating it recursively if it does not
if csvFile != "":
tgtFldr = os.path.dirname(csvFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
print ("shpFile = %s" % shpFile)
if kmlFile == "":
kmlFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.kml'))
if os.path.dirname(kmlFile) == "":
kmlFile = os.path.join(tgtPath, kmlFile)
print ("kmlFile = %s" % kmlFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# Open files
csvfile=open(csvFile,'w', newline='')
shpDs=ogr.Open(shpFile)
shpLayer=shpDs.GetLayer()
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile)
kmlLayer = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0])
# Get field names
shpDfn=shpLayer.GetLayerDefn()
nfields=shpDfn.GetFieldCount()
headers=[]
for i in range(nfields):
headers.append(shpDfn.GetFieldDefn(i).GetName())
field = shpDfn.GetFieldDefn(i).GetName()
field_def = ogr.FieldDefn(field)
kmlLayer.CreateField(field_def)
headers.append('kmlgeometry')
csvwriter = csv.DictWriter(csvfile, headers)
try:
csvwriter.writeheader() #python 2.7+
except:
header = ','.join(headers) + '\n'
csvfile.write(header.encode('utf-8'))
bgnTime = time.time()
# Write attributes and kml out to csv
rows = 0
for shpFeat in shpLayer:
attributes=shpFeat.items()
shpGeom=shpFeat.GetGeometryRef()
attributes['kmlgeometry']=shpGeom.ExportToKML()
csvwriter.writerow(attributes)
kmlFeat = ogr.Feature(kmlLayer.GetLayerDefn())
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column)
kmlFeat.SetField(field, attributes[field])
kmlFeat.SetGeometry(ogr.CreateGeometryFromGML(attributes['kmlgeometry']))
kmlLayer.CreateFeature(kmlFeat)
rows += 1
if rows % flushCount == 0:
csvfile.flush()
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
if maxRecords > 0 and rows >= maxRecords:
break
# clean up
del csvwriter,shpLayer,shpDs,kmlLayer,kmlDs
csvfile.close()
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the TAB-delimited file to CSV file method
# -------------------------------------------------------------------------
def tab2csv(self, tabFile, tgtPath, csvFile, maxRecords, flushCount, tabDelimiter='\t', csvDelimiter=','):
print("")
print("=============================")
print("TAB to CSV file conversion...")
print("-----------------------------")
print("tabFile '%s'" % tabFile)
print("csvFile '%s'" % csvFile)
# expand any leading tilde
# to the user's home path
tabFile = os.path.expanduser(tabFile)
csvFile = os.path.expanduser(csvFile)
# verify that TAB file exists
if not os.path.exists(tabFile):
print ("TAB file '%s' does NOT exist!" % tabFile)
return
# make sure the CSV target folder exists,
# creating it recursively if it does not
if csvFile != "":
tgtFldr = os.path.dirname(csvFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tabFile).lower().replace('.tab','.csv').replace('.txt','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
fReader = open(tabFile,'r')
csvreader=csv.DictReader(fReader, delimiter=tabDelimiter)
fWriter = open(csvFile,'w',newline='')
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=csvreader.fieldnames)
csvwriter.writeheader()
bgnTime = time.time()
rows = 0
cols = dict()
for rowDict in csvreader:
cols.clear()
for colName in csvreader.fieldnames:
cols[colName] = ' '.join(rowDict[colName].strip().split())
csvwriter.writerow(cols)
rows += 1
if rows % flushCount == 0:
fWriter.flush()
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
if maxRecords > 0 and rows >= maxRecords:
break
#clean up
fReader.close()
fWriter.close()
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the TXT file to CSV file method
# -------------------------------------------------------------------------
def txt2csv(self, txtFile, tgtPath, csvFile, maxRecords, flushCount, txtDelimiter=',', csvDelimiter=','):
print("")
print("=============================")
print("TXT to CSV file conversion...")
print("-----------------------------")
print("txtFile '%s'" % txtFile)
print("csvFile '%s'" % csvFile)
# expand any leading tilde
# to the user's home path
txtFile = os.path.expanduser(txtFile)
csvFile = os.path.expanduser(csvFile)
# verify that TXT file exists
if not os.path.exists(txtFile):
print ("TXT file '%s' does NOT exist!" % txtFile)
return
# make sure the CSV target folder exists,
# creating it recursively if it does not
if csvFile != "":
tgtFldr = os.path.dirname(csvFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(txtFile).lower().replace('.txt','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
fReader = open(txtFile,'r')
csvreader=csv.DictReader(fReader, delimiter=txtDelimiter)
fWriter = open(csvFile,'w',newline='')
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=csvreader.fieldnames)
csvwriter.writeheader()
bgnTime = time.time()
rows = 0
cols = dict()
for rowDict in csvreader:
cols.clear()
for colName in csvreader.fieldnames:
try:
cols[colName] = ' '.join(rowDict[colName].strip().split())
except AttributeError as e:
cols[colName] = ''
print (e)
csvwriter.writerow(cols)
rows += 1
if rows % flushCount == 0:
fWriter.flush()
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
if maxRecords > 0 and rows >= maxRecords:
break
#clean up
fReader.close()
fWriter.close()
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the XLS to CSV file method
# -------------------------------------------------------------------------
def xls2csv(self, xlsFile, tgtPath, csvFile, maxRecords, flushCount):
print("")
print("=============================")
print("XLS to CSV file conversion...")
print("-----------------------------")
print("xlsFile '%s'" % xlsFile)
print("csvFile '%s'" % csvFile)
# expand any leading tilde
# to the user's home path
xlsFile = os.path.expanduser(xlsFile)
csvFile = os.path.expanduser(csvFile)
# verify that TAB file exists
if not os.path.exists(xlsFile):
print ("XLS file '%s' does NOT exist!" % xlsFile)
return
# make sure the CSV target folder exists,
# creating it recursively if it does not
if csvFile != "":
tgtFldr = os.path.dirname(csvFile)
if tgtFldr == "":
tgtFldr = tgtPath
if not os.path.exists(tgtFldr):
os.makedirs(tgtFldr)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(xlsFile).lower().replace('.xls','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
xlsWB = xlrd.open_workbook(xlsFile)
xlsSH = xlsWB.sheet_by_index(0)
nbrRows = xlsSH.nrows
fWriter = open(csvFile,'w',newline='')
csvwriter=csv.writer(fWriter, delimiter=',')
bgnTime = time.time()
rows = 0
values = []
for rowNbr in range(0, nbrRows):
values.clear()
for entry in xlsSH.row_values(rowNbr):
values.append(str(entry).strip())
try:
csvwriter.writerow(values)
except UnicodeEncodeError as e:
print (e)
rows += 1
if rows % flushCount == 0:
fWriter.flush()
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
if maxRecords > 0 and rows >= maxRecords:
break
fWriter.close()
print ("-----------------------------------")
endTime = time.time()
rcdsPerSec = rows / (endTime - bgnTime)
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
print ("")
return
# -------------------------------------------------------------------------
# define the unZIP a file to a target folder method
# -------------------------------------------------------------------------
def zip2dir(self, zipFile, tgtPath):
print("")
print("=================================")
print("ZIP to TGT directory expansion...")
print("---------------------------------")
print("zipFile '%s'" % zipFile)
print("tgtPath '%s'" % tgtPath)
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
# create the target folder
# recursively if not extant
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# extract all of the files/folders
# within the zip file to the target folder
with zipfile.ZipFile(zipFile, "r") as z:
z.extractall(tgtPath)
return
# -------------------------------------------------------------------------
# define the ZIP file to a TXT file and then to a CSV file method
# -------------------------------------------------------------------------
def zip2csv2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, srcDelimiter=',', csvDelimiter=','):
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
unzPath = os.path.expanduser(unzPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
rmvFldr = False
# if no target folder specified,
if tgtPath == "":
# output an error message
print ("Target folder was NOT specified!")
else:
print ("Target folder '%s'was specified." % tgtPath)
# make sure the target folder exists,
# creating it recursively if it does not
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# if UNZIP folder was NOT specifed,
# generate a temporary UNZIP folder
# to be removed after processing
if unzPath == "":
unzPath = tempfile.mkdtemp()
rmvFldr = True
print ("Unzip folder '%s' was generated." % unzPath)
else:
print ("Unzip folder '%s' was specified." % unzPath)
# make sure the UNZIP folder exists,
# creating it recursively if it does not
if not os.path.exists(unzPath):
os.makedirs(unzPath)
# unzip the ZIP file
# to the UNZIP folder
self.zip2dir(zipFile, unzPath)
# find the SHP file in
# the specified UNZIP folder
tmpFile = ""
for aFile in os.listdir(unzPath):
if aFile.lower().endswith(".csv"):
tmpFile = os.path.join(unzPath, aFile)
break
# verify that a SHP file was found
if tmpFile.strip() == "":
print ("Zip file '%s' did NOT contain a CSV file!" % zipFile)
return
else:
print ("zipFile = %s" % zipFile)
print ("srcFile = %s" % tmpFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower())
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# create the CSV file from the SRC file
self.txt2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, srcDelimiter, csvDelimiter)
# if UNZIP folder removal specified,
# remove the UNZIP folder in question
if rmvFldr:
shutil.rmtree(unzPath)
return
# -------------------------------------------------------------------------
# defin the ZIP file to a DBF file, then to a CSV file method
# -------------------------------------------------------------------------
def zip2dbf2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, csvDelimiter=','):
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
unzPath = os.path.expanduser(unzPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
rmvFldr = False
# if no target folder specified,
if tgtPath == "":
# output an error message
print ("Target folder was NOT specified!")
else:
print ("Target folder '%s'was specified." % tgtPath)
# make sure the target folder exists,
# creating it recursively if it does not
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# if UNZIP folder was NOT specifed,
# generate a temporary UNZIP folder
# to be removed after processing
if unzPath == "":
unzPath = tempfile.mkdtemp()
rmvFldr = True
print ("Unzip folder '%s' was generated." % unzPath)
else:
print ("Unzip folder '%s' was specified." % unzPath)
# make sure the UNZIP folder exists,
# creating it recursively if it does not
if not os.path.exists(unzPath):
os.makedirs(unzPath)
# unzip the ZIP file
# to the UNZIP folder
self.zip2dir(zipFile, unzPath)
# find the TAB or TXT file in
# the specified UNZIP folder
tmpFile = ""
for aFile in os.listdir(unzPath):
if aFile.lower().endswith(".dbf"):
tmpFile = os.path.join(unzPath, aFile)
print ("zipFile = %s" % zipFile)
print ("dbfFile = %s" % tmpFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.dbf','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# create the CSV file from the DBF file
self.dbf2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, csvDelimiter)
# verify that a TAB or TXT file was found
if tmpFile.strip() == "":
print ("Zip file '%s' did NOT contain a DBF file!" % zipFile)
# if UNZIP folder removal specified,
# remove the UNZIP folder in question
if rmvFldr:
shutil.rmtree(unzPath)
return
# -------------------------------------------------------------------------
# defin the ZIP file to a MDB file, then to a CSV file method
# -------------------------------------------------------------------------
def zip2mdb2csv(self, zipFile, tgtPath, csvFile, tblName, maxRecords, flushCount, unzPath, csvDelimiter=','):
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
unzPath = os.path.expanduser(unzPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
rmvFldr = False
# if no target folder specified,
if tgtPath == "":
# output an error message
print ("Target folder was NOT specified!")
else:
print ("Target folder '%s'was specified." % tgtPath)
# make sure the target folder exists,
# creating it recursively if it does not
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# if UNZIP folder was NOT specifed,
# generate a temporary UNZIP folder
# to be removed after processing
if unzPath == "":
unzPath = tempfile.mkdtemp()
rmvFldr = True
print ("Unzip folder '%s' was generated." % unzPath)
else:
print ("Unzip folder '%s' was specified." % unzPath)
# make sure the UNZIP folder exists,
# creating it recursively if it does not
if not os.path.exists(unzPath):
os.makedirs(unzPath)
# unzip the ZIP file
# to the UNZIP folder
self.zip2dir(zipFile, unzPath)
# find the TAB or TXT file in
# the specified UNZIP folder
tmpFile = ""
for aFile in os.listdir(unzPath):
if aFile.lower().endswith(".mdb") or aFile.lower().endswith(".accdb"):
tmpFile = os.path.join(unzPath, aFile)
print ("zipFile = %s" % zipFile)
print ("mdbFile = %s" % tmpFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.mdb','.csv').replace('.accdb','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# create the CSV files from the TAB file
self.mdb2csv(tmpFile, tgtPath, csvFile, tblName, maxRecords, flushCount, csvDelimiter)
# verify that a TAB or TXT file was found
if tmpFile.strip() == "":
print ("Zip file '%s' did NOT contain a MDB or ACCDB file!" % zipFile)
# if UNZIP folder removal specified,
# remove the UNZIP folder in question
if rmvFldr:
shutil.rmtree(unzPath)
return
# -------------------------------------------------------------------------
# define the ZIP file to a SHP file, then to CSV files method
# -------------------------------------------------------------------------
def zip2shp2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath):
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
unzPath = os.path.expanduser(unzPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
rmvFldr = False
# if no target folder specified,
if tgtPath == "":
# output an error message
print ("Target folder was NOT specified!")
else:
print ("Target folder '%s'was specified." % tgtPath)
# make sure the target folder exists,
# creating it recursively if it does not
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# if UNZIP folder was NOT specifed,
# generate a temporary UNZIP folder
# to be removed after processing
if unzPath == "":
unzPath = tempfile.mkdtemp()
rmvFldr = True
print ("Unzip folder '%s' was generated." % unzPath)
else:
print ("Unzip folder '%s' was specified." % unzPath)
# make sure the UNZIP folder exists,
# creating it recursively if it does not
if not os.path.exists(unzPath):
os.makedirs(unzPath)
# unzip the ZIP file
# to the UNZIP folder
self.zip2dir(zipFile, unzPath)
# find the SHP file in
# the specified UNZIP folder
tmpFile = ""
for aFile in os.listdir(unzPath):
if aFile.lower().endswith(".shp"):
tmpFile = os.path.join(unzPath, aFile)
break
# verify that a SHP file was found
if tmpFile.strip() == "":
print ("Zip file '%s' did NOT contain a SHP file!" % zipFile)
return
else:
print ("zipFile = %s" % zipFile)
print ("shpFile = %s" % tmpFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# create the CSV file from the SHP file
self.shp2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount)
# if UNZIP folder removal specified,
# remove the UNZIP folder in question
if rmvFldr:
shutil.rmtree(unzPath)
return
# -------------------------------------------------------------------------
# define the ZIP file to a SHP file, then to KML and CSV files method
# -------------------------------------------------------------------------
def zip2shp2kmlcsv(self, zipFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount, unzPath):
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
unzPath = os.path.expanduser(unzPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
rmvFldr = False
# if no target folder specified,
if tgtPath == "":
# output an error message
print ("Target folder was NOT specified!")
else:
print ("Target folder '%s'was specified." % tgtPath)
# make sure the target folder exists,
# creating it recursively if it does not
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# if UNZIP folder was NOT specifed,
# generate a temporary UNZIP folder
# to be removed after processing
if unzPath == "":
unzPath = tempfile.mkdtemp()
rmvFldr = True
print ("Unzip folder '%s' was generated." % unzPath)
else:
print ("Unzip folder '%s' was specified." % unzPath)
# make sure the UNZIP folder exists,
# creating it recursively if it does not
if os.path.exists(unzPath):
os.makedirs(unzPath)
# unzip the ZIP file
# to the UNZIP folder
self.zip2dir(zipFile, unzPath)
# find the SHP file in
# the specified UNZIP folder
tmpFile = ""
for aFile in os.listdir(unzPath):
if aFile.lower().endswith(".shp"):
tmpFile = os.path.join(unzPath, aFile)
break
# verify that a SHP file was found
if tmpFile.strip() == "":
print ("Zip file '%s' did NOT contain a SHP file!" % zipFile)
return
else:
print ("zipFile = %s" % zipFile)
print ("shpFile = %s" % tmpFile)
if kmlFile == "":
kmlFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.kml'))
if os.path.dirname(kmlFile) == "":
kmlFile = os.path.join(tgtPath, kmlFile)
print ("kmlFile = %s" % kmlFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# create the KML and CSV files from the SHP file
self.shp2kmlcsv(tmpFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount)
# if UNZIP folder removal specified,
# remove the UNZIP folder in question
if rmvFldr:
shutil.rmtree(unzPath)
return
# -------------------------------------------------------------------------
# defin the ZIP file to a TAB file, then to a CSV file method
# -------------------------------------------------------------------------
def zip2tab2csv(self, zipFile, tgtPath, tabFile, csvFile, maxRecords, flushCount, unzPath, tabDelimiter='\t', csvDelimiter=','):
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
unzPath = os.path.expanduser(unzPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
rmvFldr = False
# if no target folder specified,
if tgtPath == "":
# output an error message
print ("Target folder was NOT specified!")
else:
print ("Target folder '%s'was specified." % tgtPath)
# make sure the target folder exists,
# creating it recursively if it does not
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# if UNZIP folder was NOT specifed,
# generate a temporary UNZIP folder
# to be removed after processing
if unzPath == "":
unzPath = tempfile.mkdtemp()
rmvFldr = True
print ("Unzip folder '%s' was generated." % unzPath)
else:
print ("Unzip folder '%s' was specified." % unzPath)
# make sure the UNZIP folder exists,
# creating it recursively if it does not
if not os.path.exists(unzPath):
os.makedirs(unzPath)
# unzip the ZIP file
# to the UNZIP folder
self.zip2dir(zipFile, unzPath)
# find the TAB or TXT file in
# the specified UNZIP folder
tmpFile = ""
for aFile in os.listdir(unzPath):
if aFile.lower().endswith(".tab") or aFile.lower().endswith(".txt"):
tmpFile = os.path.join(unzPath, aFile)
print ("zipFile = %s" % zipFile)
print ("tabFile = %s" % tmpFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.tab','.csv').replace('.txt','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# create the CSV files from the TAB file
self.tab2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, tabDelimiter, csvDelimiter)
# verify that a TAB or TXT file was found
if tmpFile.strip() == "":
print ("Zip file '%s' did NOT contain a TAB or TXT file!" % zipFile)
# if UNZIP folder removal specified,
# remove the UNZIP folder in question
if rmvFldr:
shutil.rmtree(unzPath)
return
# -------------------------------------------------------------------------
# define the ZIP file to a TXT file and then to a CSV file method
# -------------------------------------------------------------------------
def zip2txt2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, txtDelimiter=',', csvDelimiter=','):
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
unzPath = os.path.expanduser(unzPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
rmvFldr = False
# if no target folder specified,
if tgtPath == "":
# output an error message
print ("Target folder was NOT specified!")
else:
print ("Target folder '%s'was specified." % tgtPath)
# make sure the target folder exists,
# creating it recursively if it does not
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# if UNZIP folder was NOT specifed,
# generate a temporary UNZIP folder
# to be removed after processing
if unzPath == "":
unzPath = tempfile.mkdtemp()
rmvFldr = True
print ("Unzip folder '%s' was generated." % unzPath)
else:
print ("Unzip folder '%s' was specified." % unzPath)
# make sure the UNZIP folder exists,
# creating it recursively if it does not
if not os.path.exists(unzPath):
os.makedirs(unzPath)
# unzip the ZIP file
# to the UNZIP folder
self.zip2dir(zipFile, unzPath)
# find the SHP file in
# the specified UNZIP folder
tmpFile = ""
for aFile in os.listdir(unzPath):
if aFile.lower().endswith(".txt"):
tmpFile = os.path.join(unzPath, aFile)
break
# verify that a SHP file was found
if tmpFile.strip() == "":
print ("Zip file '%s' did NOT contain a TXT file!" % zipFile)
return
else:
print ("zipFile = %s" % zipFile)
print ("txtFile = %s" % tmpFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.txt','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# create the CSV file from the TXT file
self.txt2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, txtDelimiter, csvDelimiter)
# if UNZIP folder removal specified,
# remove the UNZIP folder in question
if rmvFldr:
shutil.rmtree(unzPath)
return
# -------------------------------------------------------------------------
# defin the ZIP file to a XLS file, then to a CSV file method
# -------------------------------------------------------------------------
def zip2xls2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath):
# expand any leading tilde
# to the user's home path
zipFile = os.path.expanduser(zipFile)
tgtPath = os.path.expanduser(tgtPath)
unzPath = os.path.expanduser(unzPath)
# verify that ZIP file exists
if not os.path.exists(zipFile):
print ("ZIP file '%s' does NOT exist!" % zipFile)
return
rmvFldr = False
# if no target folder specified,
if tgtPath == "":
# output an error message
print ("Target folder was NOT specified!")
else:
print ("Target folder '%s'was specified." % tgtPath)
# make sure the target folder exists,
# creating it recursively if it does not
if not os.path.exists(tgtPath):
os.makedirs(tgtPath)
# if UNZIP folder was NOT specifed,
# generate a temporary UNZIP folder
# to be removed after processing
if unzPath == "":
unzPath = tempfile.mkdtemp()
rmvFldr = True
print ("Unzip folder '%s' was generated." % unzPath)
else:
print ("Unzip folder '%s' was specified." % unzPath)
# make sure the UNZIP folder exists,
# creating it recursively if it does not
if not os.path.exists(unzPath):
os.makedirs(unzPath)
# unzip the ZIP file
# to the UNZIP folder
self.zip2dir(zipFile, unzPath)
# find the TAB or TXT file in
# the specified UNZIP folder
tmpFile = ""
for aFile in os.listdir(unzPath):
if aFile.lower().endswith(".xls") or aFile.lower().endswith(".xlsx"):
tmpFile = os.path.join(unzPath, aFile)
print ("zipFile = %s" % zipFile)
print ("xlsFile = %s" % tmpFile)
if csvFile == "":
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.xls','.csv').replace('.xlsx','.csv'))
if os.path.dirname(csvFile) == "":
csvFile = os.path.join(tgtPath, csvFile)
print ("csvFile = %s" % csvFile)
# create the CSV files from the XLS file
self.xls2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount)
# verify that a TAB or TXT file was found
if tmpFile.strip() == "":
print ("Zip file '%s' did NOT contain a XLS file!" % zipFile)
# if UNZIP folder removal specified,
# remove the UNZIP folder in question
if rmvFldr:
shutil.rmtree(unzPath)
return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment