KhepryQuixote · May 4, 2021 14:12 · KhepryQuixote · Sep 30, 2014 · nate12o6 · Sep 18, 2015
diff --git a/PyFrackETL.py b/PyFrackETL.py
 # -*- coding: utf-8 -*-

 '''
 This Python script is oriented towards oil, gas, and fracking well
 information download, extract, and output at the state-level within
 the United States of America.  Not all states are present as not all
 states either allow fracking or have any sites available for download.

 This Python script is dependent upon another Python script entitled
 Src2Tgt.py, which is also available as a Gist under the same name.
 '''

 import ftplib
 import os
 import socket
 import urllib
 import requests

 from Src2Tgt import *

 from urllib.request import urlretrieve

 # global variables

 downloadFiles = False
 unpackFiles = True

 maxRecords = 0
 flushCount = 1000

 # =============================================================================    
 # Main routine:
 # =============================================================================    


 def main():
    
    states = ['OH','PA'] # these states are still thorns in my side at the moment
    states = ['NY'] # this or these states are the ones I wish to download
    
    # NOTE: 'OH' fails because of the FTP site
    # is either not active or is not allowing connections
    
    # NOTE: 'PA' fails because of a 302 redirect
    # Look under your Chrome "Python" bookmarks
    # for sample code as to how to handle redirects
    # This was somewhat solved using "requests" instead of "urllib",
    # however, now the website wants the program to "agree" by pressing
    # a button on the redirect screen
    
    # Can I programmatically "submit" an "agree"?
    # <input type="submit" name="ctl00$MainContent$AgreeButton" value="Agree" id="MainContent_AgreeButton" />
    # I've done it before via C# and Java, but what the hell, why not in Python, eh?

    mainPath = "/home/temp/fracking/data"
    
    downloadPath = os.path.join(mainPath, "download")
    unpackedPath = os.path.join(mainPath, "unpacked")

    listStates = {'AK':'Alaska',
                  'AL':'Alabama',
                  'AR':'Arkansas',
                  'CA':'California',
                  'CO':'Colorado',
                  'FL':'Florida',
                  'ID':'Idaho',
                  'KS':'Kansas',
                  'LA':'Louisiana',
                  'MI':'Michigan',
                  'MO':'Missouri',
                  'MT':'Montana',
                  'ND':'North Dakota',
                  'NE':'Nebraska',
                  'NY':'New York',
                  'OH':'Ohio',
                  'OK':'Oklahoma',
                  'OR':'Oregon',
                  'PA':'Pennsylvania',
                  'UT':'Utah',
                  'VA':'Virginia',
                  'WA':'Washington',
                  'WV':'West Virginia'}
    
    for state in states:    
        if state in listStates:
            print ('--------------------')
            print ('%s (%s) process' % (listStates[state], state))
            process_files(downloadPath, unpackedPath, state, downloadFiles, unpackFiles, maxRecords, flushCount)


 # =============================================================================    
 # Well information downloads and processing, state-by-state:
 # =============================================================================    

 def process_files(dwnPath, unpPath, state, downloadFiles, unpackFiles, maxRecords, flushCount):
    
    unzPath = ""

    dwnPath = os.path.join(dwnPath, state)
    unpPath = os.path.join(unpPath, state)

    if not os.path.exists(dwnPath):
        os.makedirs(dwnPath)

    if not os.path.exists(unpPath):
        os.makedirs(unpPath)

    src2tgt = Src2Tgt()

    # -------------------------------------------------------------------------
    # Alaska
    # -------------------------------------------------------------------------
    if state == 'AK':

        tgtFile = "AOGCC_DataExtract.zip"
        if downloadFiles:
            httpUrl = "http://aogweb.state.ak.us/Data_Extract/AOGCC_DataExtract.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'tblWellMaster', maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Alabama
    # -------------------------------------------------------------------------
    if state == 'AL':
        
        tgtFile = "wells.zip"
        if downloadFiles:
            httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/wells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "bhl_loc.zip"
        if downloadFiles:
            httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/bhl_loc.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "bhl_lines.zip"
        if downloadFiles:
            httpUrl = "http://www.ogb.state.al.us/ogb/ogb_gisdata/bhl_lines.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Arkansas
    # -------------------------------------------------------------------------
    if state == 'AR':
        
        tgtFile = "Natural_Gas_and_Oil_Wells.zip"
        if downloadFiles:
            httpUrl = "http://aogc2.state.ar.us/GIS_GOOGLE/Natural_Gas_and_Oil_Wells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            pass # TODO: need to process these as kml2csv
        

        tgtFile = "Drilling_Fluid_Disposal_Sites.zip"
        if downloadFiles:
            httpUrl = "http://aogc2.state.ar.us/GIS_GOOGLE/Drilling_Fluid_Disposal_Sites.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            pass # TODO: need to process these as kml2csv

    # -------------------------------------------------------------------------
    # California
    # -------------------------------------------------------------------------
    if state == 'CA':

        tgtFile = "AllWells_Excel.zip"
        if downloadFiles:
            httpUrl = "ftp://ftp.consrv.ca.gov/pub/oil/GIS/ExcelTables/AllWells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = "AllWells_Excel.csv"
            src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
            
        tgtFile = "AllWells_Shape.zip"
        if downloadFiles:
            httpUrl = "ftp://ftp.consrv.ca.gov/pub/oil/GIS/Shapefiles/AllWells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = "AllWells_Shape.csv"
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Colorado
    # -------------------------------------------------------------------------
    if state == 'CO':

        tgtFile = "WELL_SHP.ZIP"
        if downloadFiles:
            httpUrl = "http://cogcc.state.co.us/Downloads/WELL_SHP.ZIP"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
            
        tgtFile = "DIR_BHLS.ZIP"
        if downloadFiles:
            httpUrl = "http://cogcc.state.co.us/Downloads/DIR_BHLS.ZIP"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "DIRLINES.ZIP"
        if downloadFiles:
            httpUrl = "http://cogcc.state.co.us/Downloads/DIRLINES.ZIP"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)
        
    # -------------------------------------------------------------------------
    # Florida
    # -------------------------------------------------------------------------
    if state == 'FL':

        tgtFile = "og-permit-database.xlsx"
        if downloadFiles:
            httpUrl = "http://www.dep.state.fl.us/water/mines/oil_gas/docs/og-permit-database.xlsx"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            xlsFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.xlsx','.csv')
            src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount)

        tgtFile = "pre_permit_oil_gas_wells.xls"
        if downloadFiles:
            httpUrl = "http://www.dep.state.fl.us/water/mines/oil_gas/docs/pre_permit_oil_gas_wells.xls"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            xlsFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.xls','.csv')
            src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount)

    # -------------------------------------------------------------------------
    # Idaho
    # -------------------------------------------------------------------------
    if state == 'ID':

        # TODO: will need to adjust the lat/longs in that they're in
        # the Idaho Transverse Mercator (IDTM) projection.

        tgtFile = "AllPermittedWells.zip"
        if downloadFiles:
            httpUrl = "http://www.idwr.idaho.gov/ftp/gisdata/Spatial/Wells/WellConstruction/wells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "InjectionWells.zip"
        if downloadFiles:
            httpUrl = "http://www.idwr.idaho.gov/ftp/gisdata/Spatial/Wells/UndergroundInjectionControl/InjectionWells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Kansas
    # -------------------------------------------------------------------------
    if state == 'KS':

        tgtFile = "ks_wells.zip"
        if downloadFiles:
            httpUrl = "http://www.kgs.ku.edu/PRS/Ora_Archive/ks_wells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = "" # empty string forces CSV file to same name as each unzipped file
            src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "OILGAS_WELLS_GEO27.zip"
        if downloadFiles:
            httpUrl = "http://www.kgs.ku.edu/PRS/Ora_Archive/OILGAS_WELLS_GEO27.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Louisiana
    # -------------------------------------------------------------------------
    if state == 'LA':

        tgtFile = "wells.zip"
        if downloadFiles:
            httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/wells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "BOTTOM_HOLE.zip"
        if downloadFiles:
            httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/BOTTOM_HOLE.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "BOTTOM_HOLE_LINE.zip"
        if downloadFiles:
            httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/BOTTOM_HOLE_LINE.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "WATER_WELL_REG.zip"
        if downloadFiles:
            httpUrl = "http://sonris-www.dnr.state.la.us/gis/agsweb/arcgisserver/arcgisoutput/extData/shp/WATER_WELL_REG.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Michigan
    # -------------------------------------------------------------------------
    if state == 'MI':

        tgtFile = "ogs-oilandgas-well-locations.zip"
        if downloadFiles:
            ftpUrl = "ftp.deq.state.mi.us"
            ftpPath = "geowebface/DownLoads"
            ftpFile = "ogs-oilandgas-well-locations.zip"
            userName = "GeoWebFace"
            passWord = "Geology(1)"
            ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'well locations', maxRecords, flushCount, unzPath)

        tgtFile = "ogs-oilandgas-injection-data_278433_7.zip"
        if downloadFiles:
            httpUrl = "http://www.michigan.gov/documents/deq/ogs-oilandgas-injection-data_278433_7.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'Injection Wells Data', maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Missouri
    # -------------------------------------------------------------------------
    if state == 'MO':

        tgtFile = "permits-6-5-2014.xls"
        if downloadFiles:
            httpUrl = "http://www.dnr.mo.gov/geology/geosrv/docs/permits6-5-2014.xls"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            xlsFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.xls','.csv')
            src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount)

    # -------------------------------------------------------------------------
    # Montana
    # -------------------------------------------------------------------------
    if state == 'MT':

        # TODO: will have to figure out how to obtain these automatically!

        tgtFile = "WellSurfaceLongLat.aspx"
        if downloadFiles:
            httpUrl = "http://www.bogc.dnrc.mt.gov/WebApps/DataMiner/Wells/WellSurfaceLongLat.aspx"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            pass

        tgtFile = "WellBLSLongLat.aspx"
        if downloadFiles:
            httpUrl = "http://www.bogc.dnrc.mt.gov/WebApps/DataMiner/Wells/WellBLSLongLat.aspx"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            pass
        
    # -------------------------------------------------------------------------
    # North Dakota
    # -------------------------------------------------------------------------
    if state == 'ND':

        tgtFile = "Wells.zip"
        if downloadFiles:
            httpUrl = "https://www.dmr.nd.gov/output/ShapeFiles/Wells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Nebraska
    # -------------------------------------------------------------------------
    if state == 'NE':

        tgtFile = "NebraskaWellData.zip"
        if downloadFiles:
            httpUrl = "http://www.nogcc.ne.gov/Publications/NebraskaWellData.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            src2tgt.zip2mdb2csv(zipFile, unpPath, csvFile, 'tblNebraskaWellData', maxRecords, flushCount, unzPath)

        tgtFile = "NE_WELLS.zip"
        if downloadFiles:
            httpUrl = "http://www.nogcc.ne.gov/Publications/NE_WELLS.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.zip','.csv')
            # TODO: might be nice to tweak zip2shp2csv to compress multiple spaces down to one space
            src2tgt.zip2shp2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "NebraskaWellStatus_TypeCodes.pdf"
        if downloadFiles:
            httpUrl = "http://www.nogcc.ne.gov/Publications/NebraskaWellStatus_TypeCodes.pdf"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            shutil.copy2(os.path.join(dwnPath, tgtFile), os.path.join(unpPath, tgtFile))

    # -------------------------------------------------------------------------
    # New York
    # -------------------------------------------------------------------------
    if state == 'NY':

        tgtFile = "wellDOS.zip"
        if downloadFiles:
            httpUrl = "ftp://ftp.dec.state.ny.us/dmn/zip/wellDOS.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = ""
            src2tgt.zip2csv2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

    # -------------------------------------------------------------------------
    # Ohio
    # -------------------------------------------------------------------------
    if state == 'OH':

        # TODO: This FTP site appears to be no longer available
        ftpUrl = "ftp.dnr.state.oh.us"
        userName = ""
        passWord = ""
        ftpPath = "OilGas/Upload/RBDMS"

        tgtFile = "Setup.exe"
        if downloadFiles:
            ftpFile = "Setup.exe"
            ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile)

        tgtFile = "Rdbmsd97.exe"
        if downloadFiles:
            ftpFile = "Rdbmsd97.exe"
            ftp_download(ftpUrl, userName, passWord, ftpPath, ftpFile, dwnPath, tgtFile)
        if unpackFiles:
            pass

    # -------------------------------------------------------------------------
    # Oklahoma
    # -------------------------------------------------------------------------
    if state == 'OK':

        tgtFile = "W27BASE.ZIP"
        if downloadFiles:
            httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/W27BASE.ZIP"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = "" # empty string forces CSV file to same name as each unzipped file
            src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath, '|', ',')

        tgtFile = "W97OPER.ZIP"
        if downloadFiles:
            httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/W97OPER.ZIP"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = "" # empty string forces CSV file to same name as each unzipped file
            src2tgt.zip2txt2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath, '|', ',')

        tgtFile = "CONTENTS.XLS"
        if downloadFiles:
            httpUrl = "ftp://ftp.occ.state.ok.us/OG_DATA/CONTENTS.XLS"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            shutil.copy2(os.path.join(dwnPath, tgtFile), os.path.join(unpPath, tgtFile))

    # -------------------------------------------------------------------------
    # Oregon
    # -------------------------------------------------------------------------
    if state == 'OR':

        tgtFile = "OG_Permits_06-25-2013.xlsx"
        if downloadFiles:
            httpUrl = "http://www.oregongeology.org/mlrr/spreadsheets/OG_Permits_06-25-2013.xlsx"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            xlsFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.xlsx','.csv')
            src2tgt.xls2csv(xlsFile, unpPath, csvFile, maxRecords, flushCount)

    # -------------------------------------------------------------------------
    # Pennsylvania
    # -------------------------------------------------------------------------
    # TODO: This may have to be done manually until
    # I can figure out how to "submit"
    # an "agree" button to a redirected form
    if state == 'PA':

        tgtFile = "Conventional_Wells_2013_01-12.csv"
        if downloadFiles:
            httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-0"
            requests_download(httpUrl, dwnPath, tgtFile)

        tgtFile = "Unconventional_Wells_2013_01-06.csv"
        if downloadFiles:
            httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-1"
            requests_download(httpUrl, dwnPath, tgtFile)

        tgtFile = "Unconventional_Wells_2013_07-12.csv"
        if downloadFiles:
            httpUrl = "https://www.paoilandgasreporting.state.pa.us/publicreports/Modules/DataExports/ExportProductionData.aspx?PERIOD_ID=2013-2"
            requests_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            pass

    # -------------------------------------------------------------------------
    # Utah
    # -------------------------------------------------------------------------
    if state == 'UT':

        tgtFile = "welldata.exe"
        if downloadFiles:
            httpUrl = "https://oilgas.ogm.utah.gov/pub/Database/welldata.exe"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            # TODO: will have to come up with an exe2dbf2csv routine
            # in order to process this state's data file(s)
            pass

    # -------------------------------------------------------------------------
    # Virginia
    # -------------------------------------------------------------------------
    if state == 'VA':

        tgtFile = "DgoWellLocation.pdf"
        if downloadFiles:
            httpUrl = "http://www.dmme.virginia.gov/dgoinquiry/frmPrint.aspx?Form=DgoWellLocation"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            # TODO: will need to write a pdf2csv routine
            pdfFile = os.path.join(dwnPath, tgtFile)
            csvFile = tgtFile.lower().replace('.pdf','.csv')
            src2tgt.pdf2csv(pdfFile, unpPath, csvFile, maxRecords, flushCount)
            pass

    # -------------------------------------------------------------------------
    # Washington
    # -------------------------------------------------------------------------
    if state == 'WA':

        # TODO: this one appears to be a total ass
        # to work with, maybe ogr2ogr can handle it
        # TODO: come up with a gdb2csv routine
        tgtFile = "ger_portal_oil_gas_wells.zip"
        if downloadFiles:
            httpUrl = "http://www.dnr.wa.gov/Publications/ger_portal_oil_gas_wells.zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            pass

    # -------------------------------------------------------------------------
    # West Virginia
    # -------------------------------------------------------------------------
    if state == 'WV':

        # TODO: first column is missing column name
        # visual inspection reveals that the values
        # are simply the row number and can be safely
        # overlooked for the time being
        tgtFile = "WellLocationPart1(10-31-2013).zip"
        if downloadFiles:
            httpUrl = "http://www.dep.wv.gov/oil-and-gas/databaseinfo/Documents/WellLocationPart1(10-31-2013).zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = ""
            src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

        tgtFile = "WellLocationPart2(10-31-2013).zip"
        if downloadFiles:
            httpUrl = "http://www.dep.wv.gov/oil-and-gas/databaseinfo/Documents/WellLocationPart2(10-31-2013).zip"
            urllib_download(httpUrl, dwnPath, tgtFile)
        if unpackFiles:
            zipFile = os.path.join(dwnPath, tgtFile)
            csvFile = ""
            src2tgt.zip2xls2csv(zipFile, unpPath, csvFile, maxRecords, flushCount, unzPath)

    
 # =============================================================================    
 # FTP File Processing:
 #   Pull a specified file (i.e. download the file)
 # =============================================================================    


 def ftp_download(url,
                 userName,
                 passWord,
                 ftpPath,
                 ftpFile,
                 tgtPath,
                 tgtFile):

    print ("==========================")
    print ("Pulling File from FTP Site")
    print ("File: %s" % '/'.join([url, ftpPath, ftpFile]))
    
    bypass = False

    ftp = ftplib.FTP()

    try:
        ftp.connect(url)
    except socket.error as e:
        bypass = True
        print ('ERROR: unable to connect to "%s"' % url)
        print ('ERROR: FTP connection error "%s"' % e)

    if not bypass:
        if userName != "":
            try:
                ftp.login(userName, passWord)
            except ftplib.error_perm as e:
                bypass = True
                print ('ERROR: cannot login (bad user name or password) to "%s"' % url)
                print ('ERROR: FTP login error "%s"' % e)
        else:
            try:
                # login anonymously
                ftp.login()
            except ftplib.error_perm as e:
                bypass = True
                print ('ERROR: cannot login (anonymous login forbidden) to "%s"' % url)
                print ('ERROR: FTP login error "%s"' % e)
                
    if not bypass:
        try:
            ftp.cwd(ftpPath)
        except ftplib.error_perm as e:
            bypass = True
            print ('ERROR: cannot CWD to "%s"' % ftpPath)
            print ('ERROR: FTP CWD error "%s"' % e)
            
    if not bypass:
        try:
            # build the download target file path 
            tgtFilePath = os.path.join(tgtPath, tgtFile)
            print ("Download file to: %s" % tgtFilePath)
            ftp.retrbinary("RETR %s" % ftpFile, open(tgtFilePath, 'wb').write)
        except ftplib.error_perm as e:
            bypass = True
            print ('ERROR: cannot read file "%s"' % ftpFile)
            print ('ERROR: FTP RETR (binary) error "%s"' % e)
        else:
            # success!
            print ('SUCCESS: downloaded "%s" to "%s"' % (ftpFile, tgtFilePath))

    ftp.quit()
    
    return not bypass


 # =============================================================================    
 # HTTP Download routine:
 # =============================================================================    


 def urllib_download(url, tgtPath, tgtFile):
    
    tgtFullPath = os.path.abspath(os.path.join(tgtPath, tgtFile))
    
    if os.path.exists(tgtPath):
        print ("urllib_download, download start '%s'" % url)
        try:
            urlretrieve(url, tgtFullPath)
        except urllib.error.HTTPError:
            pass
        print ("urllib_download, download ended '%s'" % url)
    else:
        print ("urllib_download, tgtPath does not exist '%s'" % tgtPath)
        return


 # =============================================================================    
 # HTTP Download routine:
 # =============================================================================    


 def requests_download(url, tgtPath, tgtFile):
    
    tgtFullPath = os.path.abspath(os.path.join(tgtPath, tgtFile))
    
    if os.path.exists(tgtPath):
        print ("urllib_download, download start '%s'" % url)
        r = requests.get(url, stream=True)
        with open(tgtFullPath, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    f.flush()
        print ("urllib_download, download ended '%s'" % url)
    else:
        print ("urllib_download, tgtPath does not exist '%s'" % tgtPath)
        return


 # =============================================================================    
 # Test routine:
 # =============================================================================        


 def test_downloads():
    
    tgtPath = '/home/temp'
    
    ftpUrl = 'alt.ncsbe.gov'
    ftpUsr = 'anonymous'
    ftpPwd = ''
    ftpPath = 'data'
    ftpFile = 'ncvoter89.zip'
    tgtFile = 'ncvoter89.zip'
    
    if not os.path.exists(tgtPath):
        mkdir_recursively(tgtPath)
    
    ftp_download(ftpUrl, ftpUsr, ftpPwd, ftpPath, ftpFile, tgtPath, tgtFile)

    httpUrl = 'ftp://alt.ncsbe.gov/data/ncvoter89.zip'
    
    urllib_download(httpUrl, tgtPath, tgtFile)

    
 # ============================================================================
 # execute the mainline processing routine
 # ============================================================================


 if (__name__ == "__main__"):
    retval = main()
    
diff --git a/Src2Tgt.py b/Src2Tgt.py
 # -*- coding: utf-8 -*-

 '''
 This Python script is oriented towards oil, gas, and fracking well
 information download, extract, and output at the state-level within
 the United States of America.  Not all states are present as not all
 states either allow fracking or have any sites available for download.

 This Python script is depended upon by another Python script entitled
 PyFrackETL.py, which is also available as a Gist under the same name.
 '''

 import collections
 import csv
 import ogr
 import os
 import pyodbc
 import shutil
 import tempfile
 import time
 import xlrd
 import zipfile

 # from dbfpy import dbf

 class Src2Tgt:
    
    cp1252 = {
        # from http://www.microsoft.com/typography/unicode/1252.htm
        u"\u20AC": u"\x80", # EURO SIGN
        u"\u201A": u"\x82", # SINGLE LOW-9 QUOTATION MARK
        u"\u0192": u"\x83", # LATIN SMALL LETTER F WITH HOOK
        u"\u201E": u"\x84", # DOUBLE LOW-9 QUOTATION MARK
        u"\u2026": u"\x85", # HORIZONTAL ELLIPSIS
        u"\u2020": u"\x86", # DAGGER
        u"\u2021": u"\x87", # DOUBLE DAGGER
        u"\u02C6": u"\x88", # MODIFIER LETTER CIRCUMFLEX ACCENT
        u"\u2030": u"\x89", # PER MILLE SIGN
        u"\u0160": u"\x8A", # LATIN CAPITAL LETTER S WITH CARON
        u"\u2039": u"\x8B", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
        u"\u0152": u"\x8C", # LATIN CAPITAL LIGATURE OE
        u"\u017D": u"\x8E", # LATIN CAPITAL LETTER Z WITH CARON
        u"\u2018": u"\x91", # LEFT SINGLE QUOTATION MARK
        u"\u2019": u"\x92", # RIGHT SINGLE QUOTATION MARK
        u"\u201C": u"\x93", # LEFT DOUBLE QUOTATION MARK
        u"\u201D": u"\x94", # RIGHT DOUBLE QUOTATION MARK
        u"\u2022": u"\x95", # BULLET
        u"\u2013": u"\x96", # EN DASH
        u"\u2014": u"\x97", # EM DASH
        u"\u02DC": u"\x98", # SMALL TILDE
        u"\u2122": u"\x99", # TRADE MARK SIGN
        u"\u0161": u"\x9A", # LATIN SMALL LETTER S WITH CARON
        u"\u203A": u"\x9B", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
        u"\u0153": u"\x9C", # LATIN SMALL LIGATURE OE
        u"\u017E": u"\x9E", # LATIN SMALL LETTER Z WITH CARON
        u"\u0178": u"\x9F", # LATIN CAPITAL LETTER Y WITH DIAERESIS
    }

    def __init__(self):
        return       


    # -------------------------------------------------------------------------
    # define the CSV to KML file method
    # -------------------------------------------------------------------------
    
    def csv2kml(self, csvFile, tgtPath, kmlFile, maxRecords, flushCount):
        
        print ("")
        print ("========================")
        print ("CSV to KML conversion...")
        print ("------------------------")
        
        ogr.UseExceptions()
        
        # expand any leading tilde
        # to the user's home path
        csvFile = os.path.expanduser(csvFile)
        kmlFile = os.path.expanduser(kmlFile)
        
        # verify that SHP file exists
        if not os.path.exists(csvFile):
            print ("CSV file '%s' does NOT exist!" % csvFile)
            return
            
        # make sure the target folder exists,
        # creating it recursively if it does not
        if kmlFile != "":
            tgtFldr = os.path.dirname(kmlFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)
            
        print ("csvFile = %s" % csvFile)
        if kmlFile == "":
            kmlFile = os.path.join(tgtPath, os.path.basename(csvFile).lower().replace('.csv','.kml').replace('.txt','.kml'))
        if os.path.dirname(kmlFile) == "":
            kmlFile = os.path.join(tgtPath, kmlFile)
        print ("kmlFile = %s" % kmlFile)
        
        csvreader=csv.reader(open(csvFile,'r'))
        headers=csvreader.next()
        
        kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile)
        kmlLyr = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0])
        
        for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column)
            field_def = ogr.FieldDefn(field)
            kmlLyr.CreateField(field_def)
        
        bgnTime = time.time()
        
        rows = 0
        for rec in csvreader:
            feat = ogr.Feature(kmlLyr.GetLayerDefn())
            for i,field in enumerate(headers[:-1]): #skip kmlgeometry
                feat.SetField(field, rec[i])
            feat.SetGeometry(ogr.CreateGeometryFromGML(rec[-1]))
            kmlLyr.CreateFeature(feat)
            rows += 1
            if rows % flushCount == 0:
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
            if maxRecords > 0 and rows >= maxRecords:
                break
        
        #clean up
        del kmlLyr,kmlDs
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return


    # -------------------------------------------------------------------------
    # define the DBF file to CSV file method
    # -------------------------------------------------------------------------
    def dbf2csv(self, dbfFile, tgtPath, csvFile, maxRecords, flushCount, csvDelimiter=','):
        
        print("")
        print("=============================")
        print("DBF to CSV file conversion...")
        print("-----------------------------")
        print("dbfFile '%s'" % dbfFile)
        print("csvFile '%s'" % csvFile)
        
        # expand any leading tilde
        # to the user's home path
        dbfFile = os.path.expanduser(dbfFile)
        csvFile = os.path.expanduser(csvFile)
         
        # verify that DBF file exists
        if not os.path.exists(dbfFile):
            print ("DBF file '%s' does NOT exist!" % dbfFile)
            return
            
        # make sure the CSV target folder exists,
        # creating it recursively if it does not
        if csvFile != "":
            tgtFldr = os.path.dirname(csvFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)
            
        if csvFile == "":
            csvFile = os.path.join(tgtPath, os.path.basename(dbfFile).lower().replace('.dbf','.csv'))
        if os.path.dirname(csvFile) == "":
            csvFile = os.path.join(tgtPath, csvFile)
        print ("csvFile = %s" % csvFile)
       
        dbffile = dbf.Dbf(dbfFile)
        colNames = []
        for colName in dbffile.header.fields:
            colNames.append(colName)
        fWriter = open(csvFile,'w',newline='')
        csvwriter=csv.Writer(fWriter, delimiter=csvDelimiter)
        csvwriter.writerow(colNames)
        
        bgnTime = time.time()
        
        rows = 0
        for row in dbffile:
            csvwriter.writerow(row)
            rows += 1
            if rows % flushCount == 0:
                fWriter.flush()
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
            if maxRecords > 0 and rows >= maxRecords:
                break
        
        #clean up
        dbffile.close()
        fWriter.close()
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return

        
    # -------------------------------------------------------------------------
    # define the KML to CSV file method
    # -------------------------------------------------------------------------
    
    def kml2csv(self, kmlFile, tgtPath, csvFile, maxRecords, flushCount):
        return

        
    # -------------------------------------------------------------------------
    # define the MDB to CSV file method
    # -------------------------------------------------------------------------
    
    def mdb2csv(self, mdbFile, tgtPath, csvFile, tblName, maxRecords, flushCount, csvDelimiter=','):
        
        print("")
        print("=============================")
        print("MDB to CSV file conversion...")
        print("-----------------------------")
        print("mdbFile '%s'" % mdbFile)
        print("csvFile '%s'" % csvFile)
        print("tblName '%s'" % tblName)
        
        # expand any leading tilde
        # to the user's home path
        mdbFile = os.path.expanduser(mdbFile)
        csvFile = os.path.expanduser(csvFile)
         
        # verify that MDB file exists
        if not os.path.exists(mdbFile):
            print ("TAB file '%s' does NOT exist!" % mdbFile)
            return
            
        # make sure the CSV target folder exists,
        # creating it recursively if it does not
        if csvFile != "":
            tgtFldr = os.path.dirname(csvFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)
            
        if csvFile == "":
            csvFile = os.path.join(tgtPath, os.path.basename(mdbFile).lower().replace('.mdb','.csv').replace('.accdb','.csv'))
        if os.path.dirname(csvFile) == "":
            csvFile = os.path.join(tgtPath, csvFile)
        print ("csvFile = %s" % csvFile)
        
        bgnTime = time.time()
        
        mdbFile = os.path.abspath(mdbFile)
        connString = "Driver={Microsoft Access Driver (*.mdb)};DBQ=%s;" % mdbFile
        print ("pyODBC connString: %s" % connString)
        conn = pyodbc.connect(connString)
        cursor = conn.cursor()

        colNames = collections.OrderedDict()
        colValues = collections.OrderedDict()
        for row in cursor.columns(table='%s' % tblName):
            colNames[row.column_name] = row.column_name
       
        fWriter = open(csvFile,'w',newline='')
        csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=colNames)
        csvwriter.writeheader()

        cursor.execute("select * from [%s]" % tblName)
        
        rows = 0
        for row in cursor:
            colValues.clear()
            idx = 0
            for colName in colNames:
                colValues[colName] = row[idx]
                idx += 1
            csvwriter.writerow(colValues)
            rows += 1
            if rows % flushCount == 0:
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
            if maxRecords > 0 and rows >= maxRecords:
                break
        conn.close()            
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return

        
    # -------------------------------------------------------------------------
    # define the MDB to CSV file method
    # -------------------------------------------------------------------------
    
    def pdf2csv(self, pdfFile, tgtPath, csvFile, maxRecords, flushCount):
        return
    
    
    # -------------------------------------------------------------------------
    # define the SHP to CSV file method
    # -------------------------------------------------------------------------
    
    def shp2csv(self, shpFile, tgtPath, csvFile, maxRecords, flushCount):
        
        print ("")
        print ("========================")
        print ("SHP to CSV conversion...")
        print ("------------------------")
        
        # expand any leading tilde
        # to the user's home path
        shpFile = os.path.expanduser(shpFile)
        csvFile = os.path.expanduser(csvFile)
        
        # verify that SHP file exists
        if not os.path.exists(shpFile):
            print ("SHP file '%s' does NOT exist!" % shpFile)
            return
        elif not shpFile.lower().endswith(".shp"):
            print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile)
            return
            
        # make sure the target folder exists,
        # creating it recursively if it does not
        if csvFile != "":
            tgtFldr = os.path.dirname(csvFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)

        print ("shpFile = %s" % shpFile)
        if csvFile == "":
            csvFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.csv'))
        if os.path.dirname(csvFile) == "":
            csvFile = os.path.join(tgtPath, csvFile)
        print ("csvFile = %s" % csvFile)
        
        # Open files
        csvfile=open(csvFile,'w', newline='')
        shpDs=ogr.Open(shpFile)
        shpLayer=shpDs.GetLayer()
        
        # Get field names
        shpDfn=shpLayer.GetLayerDefn()
        nfields=shpDfn.GetFieldCount()
        
        headers=[]
        for i in range(nfields):
            headers.append(shpDfn.GetFieldDefn(i).GetName())
        headers.append('kmlgeometry')
        
        csvwriter = csv.DictWriter(csvfile, headers)
        try:
            csvwriter.writeheader() #python 2.7+
        except:
            csvfile.write(','.join(headers)+'\n')
        
        bgnTime = time.time()
        
        # Write attributes and kml geometry out to csv
        rows = 0
        for shpFeat in shpLayer:
            try:
                attributes=shpFeat.items()
                shpGeom=shpFeat.GetGeometryRef()
                try:
                    attributes['kmlgeometry']=shpGeom.ExportToKML()
                except AttributeError as e:
                    attributes['kmlgeometry']=""
                    print (e)
                csvwriter.writerow(attributes)
            except UnicodeEncodeError as e:
                print (e)
            rows += 1
            if rows % flushCount == 0:
                csvfile.flush()
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
            if maxRecords > 0 and rows >= maxRecords:
                break
        
        # clean up
        del csvwriter,shpLayer,shpDs
        csvfile.close()
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return
    
    
    # -------------------------------------------------------------------------
    # define the SHP to KML file method
    # -------------------------------------------------------------------------
    
    def shp2kml(self, shpFile, tgtPath, kmlFile, maxRecords, flushCount):
        
        print ("")
        print ("========================")
        print ("SHP to KML conversion...")
        print ("------------------------")

        # expand any leading tilde
        # to the user's home path
        shpFile = os.path.expanduser(shpFile)
        kmlFile = os.path.expanduser(kmlFile)
        
        # verify that SHP file exists
        if not os.path.exists(shpFile):
            print ("SHP file '%s' does NOT exist!" % shpFile)
            return
        elif not shpFile.lower().endswith(".shp"):
            print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile)
            return
            
        # make sure the KML target folder exists,
        # creating it recursively if it does not
        if kmlFile != "":
            tgtFldr = os.path.dirname(kmlFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)

        print ("shpFile = %s" % shpFile)
        if kmlFile == "":
            kmlFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.kml'))
        if os.path.dirname(kmlFile) == "":
            kmlFile = os.path.join(tgtPath, kmlFile)
        print ("kmlFile = %s" % kmlFile)
       
        # Open files
        shpDs=ogr.Open(shpFile)
        shpLayer=shpDs.GetLayer()
        
        kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile)
        kmlLayer = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0])
        
        # Get field names
        shpDfn=shpLayer.GetLayerDefn()
        nfields=shpDfn.GetFieldCount()
        headers=[]
        for i in range(nfields):
            headers.append(shpDfn.GetFieldDefn(i).GetName())
            field = shpDfn.GetFieldDefn(i).GetName()
            field_def = ogr.FieldDefn(field)
            kmlLayer.CreateField(field_def)
        headers.append('kmlgeometry')
        
        bgnTime = time.time()
       
        # Write attributes and kml out to csv
        rows = 0
        for shpFeat in shpLayer:
            attributes=shpFeat.items()
            shpGeom=shpFeat.GetGeometryRef()
            attributes['kmlgeometry']=shpGeom.ExportToKML()
            # print (attributes)
            kmlFeat = ogr.Feature(kmlLayer.GetLayerDefn())
            for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column)
                kmlFeat.SetField(field, attributes[field])
            kmlFeat.SetGeometry(ogr.CreateGeometryFromGML(attributes['kmlgeometry']))
            kmlLayer.CreateFeature(kmlFeat)
            rows += 1
            if rows % flushCount == 0:
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
                print ("Rows: {:,}".format(rows))
            if maxRecords > 0 and rows >= maxRecords:
                break
        
        # clean up
        del shpLayer,shpDs,kmlLayer,kmlDs
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return
    
    
    # -------------------------------------------------------------------------
    # define the SHP to KML and CSV files method
    # -------------------------------------------------------------------------
    def shp2kmlcsv(self, shpFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount):
        
        print ("")
        print ("================================")
        print ("SHP to KML and CSV conversion...")
        print ("--------------------------------")
        
        # expand any leading tilde
        # to the user's home path
        shpFile = os.path.expanduser(shpFile)
        kmlFile = os.path.expanduser(kmlFile)
        csvFile = os.path.expanduser(csvFile)
        
        # verify that SHP file exists
        if not os.path.exists(shpFile):
            print ("SHP file '%s' does NOT exist!" % shpFile)
            return
        elif not shpFile.lower().endswith(".shp"):
            print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile)
            return
            
        # make sure the KML target folder exists,
        # creating it recursively if it does not
        if kmlFile != "":
            tgtFldr = os.path.dirname(kmlFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)
            
        # make sure the CSV target folder exists,
        # creating it recursively if it does not
        if csvFile != "":
            tgtFldr = os.path.dirname(csvFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)

        print ("shpFile = %s" % shpFile)
        if kmlFile == "":
            kmlFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.kml'))
        if os.path.dirname(kmlFile) == "":
            kmlFile = os.path.join(tgtPath, kmlFile)
        print ("kmlFile = %s" % kmlFile)
        if csvFile == "":
            csvFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.csv'))
        if os.path.dirname(csvFile) == "":
            csvFile = os.path.join(tgtPath, csvFile)
        print ("csvFile = %s" % csvFile)
        
        # Open files
        csvfile=open(csvFile,'w', newline='')
        shpDs=ogr.Open(shpFile)
        shpLayer=shpDs.GetLayer()
        
        kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile)
        kmlLayer = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0])
        
        # Get field names
        shpDfn=shpLayer.GetLayerDefn()
        nfields=shpDfn.GetFieldCount()
        headers=[]
        for i in range(nfields):
            headers.append(shpDfn.GetFieldDefn(i).GetName())
            field = shpDfn.GetFieldDefn(i).GetName()
            field_def = ogr.FieldDefn(field)
            kmlLayer.CreateField(field_def)
        headers.append('kmlgeometry')
        
        csvwriter = csv.DictWriter(csvfile, headers)
        try:
            csvwriter.writeheader() #python 2.7+
        except:
            header = ','.join(headers) + '\n'
            csvfile.write(header.encode('utf-8'))
        
        bgnTime = time.time()
       
        # Write attributes and kml out to csv
        rows = 0
        for shpFeat in shpLayer:
            attributes=shpFeat.items()
            shpGeom=shpFeat.GetGeometryRef()
            attributes['kmlgeometry']=shpGeom.ExportToKML()
            csvwriter.writerow(attributes)
            kmlFeat = ogr.Feature(kmlLayer.GetLayerDefn())
            for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column)
                kmlFeat.SetField(field, attributes[field])
            kmlFeat.SetGeometry(ogr.CreateGeometryFromGML(attributes['kmlgeometry']))
            kmlLayer.CreateFeature(kmlFeat)
            rows += 1
            if rows % flushCount == 0:
                csvfile.flush()
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
            if maxRecords > 0 and rows >= maxRecords:
                break
        
        # clean up
        del csvwriter,shpLayer,shpDs,kmlLayer,kmlDs
        csvfile.close()
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return


    # -------------------------------------------------------------------------
    # define the TAB-delimited file to CSV file method
    # -------------------------------------------------------------------------
    def tab2csv(self, tabFile, tgtPath, csvFile, maxRecords, flushCount, tabDelimiter='\t', csvDelimiter=','):
        
        print("")
        print("=============================")
        print("TAB to CSV file conversion...")
        print("-----------------------------")
        print("tabFile '%s'" % tabFile)
        print("csvFile '%s'" % csvFile)
        
        # expand any leading tilde
        # to the user's home path
        tabFile = os.path.expanduser(tabFile)
        csvFile = os.path.expanduser(csvFile)
         
        # verify that TAB file exists
        if not os.path.exists(tabFile):
            print ("TAB file '%s' does NOT exist!" % tabFile)
            return
            
        # make sure the CSV target folder exists,
        # creating it recursively if it does not
        if csvFile != "":
            tgtFldr = os.path.dirname(csvFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)
            
        if csvFile == "":
            csvFile = os.path.join(tgtPath, os.path.basename(tabFile).lower().replace('.tab','.csv').replace('.txt','.csv'))
        if os.path.dirname(csvFile) == "":
            csvFile = os.path.join(tgtPath, csvFile)
        print ("csvFile = %s" % csvFile)
       
        fReader = open(tabFile,'r')    
        csvreader=csv.DictReader(fReader, delimiter=tabDelimiter)
        fWriter = open(csvFile,'w',newline='')
        csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=csvreader.fieldnames)
        csvwriter.writeheader()
        
        bgnTime = time.time()
        
        rows = 0
        cols = dict()
        for rowDict in csvreader:
            cols.clear()
            for colName in csvreader.fieldnames:
                cols[colName] = ' '.join(rowDict[colName].strip().split())
            csvwriter.writerow(cols)
            rows += 1
            if rows % flushCount == 0:
                fWriter.flush()
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
            if maxRecords > 0 and rows >= maxRecords:
                break
        
        #clean up
        fReader.close()
        fWriter.close()
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return


    # -------------------------------------------------------------------------
    # define the TXT file to CSV file method
    # -------------------------------------------------------------------------
    def txt2csv(self, txtFile, tgtPath, csvFile, maxRecords, flushCount, txtDelimiter=',', csvDelimiter=','):
        
        print("")
        print("=============================")
        print("TXT to CSV file conversion...")
        print("-----------------------------")
        print("txtFile '%s'" % txtFile)
        print("csvFile '%s'" % csvFile)
        
        # expand any leading tilde
        # to the user's home path
        txtFile = os.path.expanduser(txtFile)
        csvFile = os.path.expanduser(csvFile)
         
        # verify that TXT file exists
        if not os.path.exists(txtFile):
            print ("TXT file '%s' does NOT exist!" % txtFile)
            return
            
        # make sure the CSV target folder exists,
        # creating it recursively if it does not
        if csvFile != "":
            tgtFldr = os.path.dirname(csvFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)
            
        if csvFile == "":
            csvFile = os.path.join(tgtPath, os.path.basename(txtFile).lower().replace('.txt','.csv'))
        if os.path.dirname(csvFile) == "":
            csvFile = os.path.join(tgtPath, csvFile)
        print ("csvFile = %s" % csvFile)
       
        fReader = open(txtFile,'r')    
        csvreader=csv.DictReader(fReader, delimiter=txtDelimiter)
        fWriter = open(csvFile,'w',newline='')
        csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=csvreader.fieldnames)
        csvwriter.writeheader()
        
        bgnTime = time.time()
        
        rows = 0
        cols = dict()
        for rowDict in csvreader:
            cols.clear()
            for colName in csvreader.fieldnames:
                try:
                    cols[colName] = ' '.join(rowDict[colName].strip().split())
                except AttributeError as e:
                    cols[colName] = ''
                    print (e)
            csvwriter.writerow(cols)
            rows += 1
            if rows % flushCount == 0:
                fWriter.flush()
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
            if maxRecords > 0 and rows >= maxRecords:
                break
        
        #clean up
        fReader.close()
        fWriter.close()
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return

        
    # -------------------------------------------------------------------------
    # define the XLS to CSV file method
    # -------------------------------------------------------------------------
    
    def xls2csv(self, xlsFile, tgtPath, csvFile, maxRecords, flushCount):
        
        print("")
        print("=============================")
        print("XLS to CSV file conversion...")
        print("-----------------------------")
        print("xlsFile '%s'" % xlsFile)
        print("csvFile '%s'" % csvFile)
        
        # expand any leading tilde
        # to the user's home path
        xlsFile = os.path.expanduser(xlsFile)
        csvFile = os.path.expanduser(csvFile)
         
        # verify that TAB file exists
        if not os.path.exists(xlsFile):
            print ("XLS file '%s' does NOT exist!" % xlsFile)
            return
            
        # make sure the CSV target folder exists,
        # creating it recursively if it does not
        if csvFile != "":
            tgtFldr = os.path.dirname(csvFile)
            if tgtFldr == "":
                tgtFldr = tgtPath
            if not os.path.exists(tgtFldr):
                os.makedirs(tgtFldr)
            
        if csvFile == "":
            csvFile = os.path.join(tgtPath, os.path.basename(xlsFile).lower().replace('.xls','.csv'))
        if os.path.dirname(csvFile) == "":
            csvFile = os.path.join(tgtPath, csvFile)
        print ("csvFile = %s" % csvFile)
        
        xlsWB = xlrd.open_workbook(xlsFile)
        xlsSH = xlsWB.sheet_by_index(0)
        
        nbrRows = xlsSH.nrows
        
        fWriter = open(csvFile,'w',newline='')
        csvwriter=csv.writer(fWriter, delimiter=',')
        
        bgnTime = time.time()

        rows = 0
        values = []
        for rowNbr in range(0, nbrRows):
            values.clear()
            for entry in xlsSH.row_values(rowNbr):
                values.append(str(entry).strip())
            try:
                csvwriter.writerow(values)
            except UnicodeEncodeError as e:
                print (e)
            rows += 1
            if rows % flushCount == 0:
                fWriter.flush()
                endTime = time.time()
                rcdsPerSec = rows / (endTime - bgnTime)
                print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
            if maxRecords > 0 and rows >= maxRecords:
                break
            
        fWriter.close()
    
        print ("-----------------------------------")
        endTime = time.time()
        rcdsPerSec = rows / (endTime - bgnTime)
        print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec))
        print ("")
        return

        
    # -------------------------------------------------------------------------
    # define the unZIP a file to a target folder method
    # -------------------------------------------------------------------------
    def zip2dir(self, zipFile, tgtPath):
        
        print("")
        print("=================================")
        print("ZIP to TGT directory expansion...")
        print("---------------------------------")
        print("zipFile '%s'" % zipFile)
        print("tgtPath '%s'" % tgtPath)
        
        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
       
        # create the target folder
        # recursively if not extant
        if not os.path.exists(tgtPath): 
            os.makedirs(tgtPath)
        
        # extract all of the files/folders
        # within the zip file to the target folder
        with zipfile.ZipFile(zipFile, "r") as z:
            z.extractall(tgtPath)
        
        return

            
    # -------------------------------------------------------------------------
    # define the ZIP file to a TXT file and then to a CSV file method
    # -------------------------------------------------------------------------
    def zip2csv2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, srcDelimiter=',', csvDelimiter=','):

        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
        unzPath = os.path.expanduser(unzPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
        
        rmvFldr = False
        # if no target folder specified,
        if tgtPath == "":
            # output an error message
            print ("Target folder was NOT specified!")
        else:
            print ("Target folder '%s'was specified." % tgtPath)
            # make sure the target folder exists,
            # creating it recursively if it does not
            if not os.path.exists(tgtPath):            
                os.makedirs(tgtPath)
            # if UNZIP folder was NOT specifed,
            # generate a temporary UNZIP folder
            # to be removed after processing
            if unzPath == "":
                unzPath = tempfile.mkdtemp()
                rmvFldr = True
                print ("Unzip folder '%s' was generated." % unzPath)
            else:
                print ("Unzip folder '%s' was specified." % unzPath)
            # make sure the UNZIP folder exists,
            # creating it recursively if it does not
            if not os.path.exists(unzPath):
                os.makedirs(unzPath)
            # unzip the ZIP file
            # to the UNZIP folder
            self.zip2dir(zipFile, unzPath)
            # find the SHP file in
            # the specified UNZIP folder
            tmpFile = ""
            for aFile in os.listdir(unzPath):
                if aFile.lower().endswith(".csv"):
                    tmpFile = os.path.join(unzPath, aFile)
                    break
            # verify that a SHP file was found
            if tmpFile.strip() == "":
                print ("Zip file '%s' did NOT contain a CSV file!" % zipFile)
                return
            else:
                print ("zipFile = %s" % zipFile)
                print ("srcFile = %s" % tmpFile)
                if csvFile == "":
                    csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower())
                if os.path.dirname(csvFile) == "":
                    csvFile = os.path.join(tgtPath, csvFile)
                print ("csvFile = %s" % csvFile)
                # create the CSV file from the SRC file
                self.txt2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, srcDelimiter, csvDelimiter)
            # if UNZIP folder removal specified,
            # remove the UNZIP folder in question
            if rmvFldr:
                shutil.rmtree(unzPath)
                
        return


    # -------------------------------------------------------------------------
    # defin the ZIP file to a DBF file, then to a CSV file method
    # -------------------------------------------------------------------------
    def zip2dbf2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, csvDelimiter=','):

        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
        unzPath = os.path.expanduser(unzPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
        
        rmvFldr = False
        # if no target folder specified,
        if tgtPath == "":
            # output an error message
            print ("Target folder was NOT specified!")
        else:
            print ("Target folder '%s'was specified." % tgtPath)
            # make sure the target folder exists,
            # creating it recursively if it does not
            if not os.path.exists(tgtPath):
                os.makedirs(tgtPath)
            # if UNZIP folder was NOT specifed,
            # generate a temporary UNZIP folder
            # to be removed after processing
            if unzPath == "":
                unzPath = tempfile.mkdtemp()
                rmvFldr = True
                print ("Unzip folder '%s' was generated." % unzPath)
            else:
                print ("Unzip folder '%s' was specified." % unzPath)
            # make sure the UNZIP folder exists,
            # creating it recursively if it does not
            if not os.path.exists(unzPath):
                os.makedirs(unzPath)
            # unzip the ZIP file
            # to the UNZIP folder
            self.zip2dir(zipFile, unzPath)
            # find the TAB or TXT file in
            # the specified UNZIP folder
            tmpFile = ""
            for aFile in os.listdir(unzPath):
                if aFile.lower().endswith(".dbf"):
                    tmpFile = os.path.join(unzPath, aFile)
                    print ("zipFile = %s" % zipFile)
                    print ("dbfFile = %s" % tmpFile)
                    if csvFile == "":
                        csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.dbf','.csv'))
                    if os.path.dirname(csvFile) == "":
                        csvFile = os.path.join(tgtPath, csvFile)
                    print ("csvFile = %s" % csvFile)
                    # create the CSV file from the DBF file
                    self.dbf2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, csvDelimiter)
            # verify that a TAB or TXT file was found
            if tmpFile.strip() == "":
                print ("Zip file '%s' did NOT contain a DBF file!" % zipFile)
            # if UNZIP folder removal specified,
            # remove the UNZIP folder in question
            if rmvFldr:
                shutil.rmtree(unzPath)
                
        return



    # -------------------------------------------------------------------------
    # defin the ZIP file to a MDB file, then to a CSV file method
    # -------------------------------------------------------------------------
    def zip2mdb2csv(self, zipFile, tgtPath, csvFile, tblName, maxRecords, flushCount, unzPath, csvDelimiter=','):

        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
        unzPath = os.path.expanduser(unzPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
        
        rmvFldr = False
        # if no target folder specified,
        if tgtPath == "":
            # output an error message
            print ("Target folder was NOT specified!")
        else:
            print ("Target folder '%s'was specified." % tgtPath)
            # make sure the target folder exists,
            # creating it recursively if it does not
            if not os.path.exists(tgtPath):
                os.makedirs(tgtPath)
            # if UNZIP folder was NOT specifed,
            # generate a temporary UNZIP folder
            # to be removed after processing
            if unzPath == "":
                unzPath = tempfile.mkdtemp()
                rmvFldr = True
                print ("Unzip folder '%s' was generated." % unzPath)
            else:
                print ("Unzip folder '%s' was specified." % unzPath)
            # make sure the UNZIP folder exists,
            # creating it recursively if it does not
            if not os.path.exists(unzPath):
                os.makedirs(unzPath)
            # unzip the ZIP file
            # to the UNZIP folder
            self.zip2dir(zipFile, unzPath)
            # find the TAB or TXT file in
            # the specified UNZIP folder
            tmpFile = ""
            for aFile in os.listdir(unzPath):
                if aFile.lower().endswith(".mdb") or aFile.lower().endswith(".accdb"):
                    tmpFile = os.path.join(unzPath, aFile)
                    print ("zipFile = %s" % zipFile)
                    print ("mdbFile = %s" % tmpFile)
                    if csvFile == "":
                        csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.mdb','.csv').replace('.accdb','.csv'))
                    if os.path.dirname(csvFile) == "":
                        csvFile = os.path.join(tgtPath, csvFile)
                    print ("csvFile = %s" % csvFile)
                    # create the CSV files from the TAB file
                    self.mdb2csv(tmpFile, tgtPath, csvFile, tblName, maxRecords, flushCount, csvDelimiter)
            # verify that a TAB or TXT file was found
            if tmpFile.strip() == "":
                print ("Zip file '%s' did NOT contain a MDB or ACCDB file!" % zipFile)
            # if UNZIP folder removal specified,
            # remove the UNZIP folder in question
            if rmvFldr:
                shutil.rmtree(unzPath)
                
        return

            
    # -------------------------------------------------------------------------
    # define the ZIP file to a SHP file, then to CSV files method
    # -------------------------------------------------------------------------
    def zip2shp2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath):

        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
        unzPath = os.path.expanduser(unzPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
        
        rmvFldr = False
        # if no target folder specified,
        if tgtPath == "":
            # output an error message
            print ("Target folder was NOT specified!")
        else:
            print ("Target folder '%s'was specified." % tgtPath)
            # make sure the target folder exists,
            # creating it recursively if it does not
            if not os.path.exists(tgtPath):            
                os.makedirs(tgtPath)
            # if UNZIP folder was NOT specifed,
            # generate a temporary UNZIP folder
            # to be removed after processing
            if unzPath == "":
                unzPath = tempfile.mkdtemp()
                rmvFldr = True
                print ("Unzip folder '%s' was generated." % unzPath)
            else:
                print ("Unzip folder '%s' was specified." % unzPath)
            # make sure the UNZIP folder exists,
            # creating it recursively if it does not
            if not os.path.exists(unzPath):
                os.makedirs(unzPath)
            # unzip the ZIP file
            # to the UNZIP folder
            self.zip2dir(zipFile, unzPath)
            # find the SHP file in
            # the specified UNZIP folder
            tmpFile = ""
            for aFile in os.listdir(unzPath):
                if aFile.lower().endswith(".shp"):
                    tmpFile = os.path.join(unzPath, aFile)
                    break
            # verify that a SHP file was found
            if tmpFile.strip() == "":
                print ("Zip file '%s' did NOT contain a SHP file!" % zipFile)
                return
            else:
                print ("zipFile = %s" % zipFile)
                print ("shpFile = %s" % tmpFile)
                if csvFile == "":
                    csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.csv'))
                if os.path.dirname(csvFile) == "":
                    csvFile = os.path.join(tgtPath, csvFile)
                print ("csvFile = %s" % csvFile)
                # create the CSV file from the SHP file
                self.shp2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount)
            # if UNZIP folder removal specified,
            # remove the UNZIP folder in question
            if rmvFldr:
                shutil.rmtree(unzPath)
                
        return
            
    # -------------------------------------------------------------------------
    # define the ZIP file to a SHP file, then to KML and CSV files method
    # -------------------------------------------------------------------------
    def zip2shp2kmlcsv(self, zipFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount, unzPath):

        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
        unzPath = os.path.expanduser(unzPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
        
        rmvFldr = False
        # if no target folder specified,
        if tgtPath == "":
            # output an error message
            print ("Target folder was NOT specified!")
        else:
            print ("Target folder '%s'was specified." % tgtPath)
            # make sure the target folder exists,
            # creating it recursively if it does not
            if not os.path.exists(tgtPath):
                os.makedirs(tgtPath)
            # if UNZIP folder was NOT specifed,
            # generate a temporary UNZIP folder
            # to be removed after processing
            if unzPath == "":
                unzPath = tempfile.mkdtemp()
                rmvFldr = True
                print ("Unzip folder '%s' was generated." % unzPath)
            else:
                print ("Unzip folder '%s' was specified." % unzPath)
            # make sure the UNZIP folder exists,
            # creating it recursively if it does not
            if os.path.exists(unzPath):
                os.makedirs(unzPath)
            # unzip the ZIP file
            # to the UNZIP folder
            self.zip2dir(zipFile, unzPath)
            # find the SHP file in
            # the specified UNZIP folder
            tmpFile = ""
            for aFile in os.listdir(unzPath):
                if aFile.lower().endswith(".shp"):
                    tmpFile = os.path.join(unzPath, aFile)
                    break
            # verify that a SHP file was found
            if tmpFile.strip() == "":
                print ("Zip file '%s' did NOT contain a SHP file!" % zipFile)
                return
            else:
                print ("zipFile = %s" % zipFile)
                print ("shpFile = %s" % tmpFile)
                if kmlFile == "":
                    kmlFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.kml'))
                if os.path.dirname(kmlFile) == "":
                    kmlFile = os.path.join(tgtPath, kmlFile)
                print ("kmlFile = %s" % kmlFile)
                if csvFile == "":
                    csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.csv'))
                if os.path.dirname(csvFile) == "":
                    csvFile = os.path.join(tgtPath, csvFile)
                print ("csvFile = %s" % csvFile)
                # create the KML and CSV files from the SHP file
                self.shp2kmlcsv(tmpFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount)
            # if UNZIP folder removal specified,
            # remove the UNZIP folder in question
            if rmvFldr:
                shutil.rmtree(unzPath)
                
        return


    # -------------------------------------------------------------------------
    # defin the ZIP file to a TAB file, then to a CSV file method
    # -------------------------------------------------------------------------
    def zip2tab2csv(self, zipFile, tgtPath, tabFile, csvFile, maxRecords, flushCount, unzPath, tabDelimiter='\t', csvDelimiter=','):

        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
        unzPath = os.path.expanduser(unzPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
        
        rmvFldr = False
        # if no target folder specified,
        if tgtPath == "":
            # output an error message
            print ("Target folder was NOT specified!")
        else:
            print ("Target folder '%s'was specified." % tgtPath)
            # make sure the target folder exists,
            # creating it recursively if it does not
            if not os.path.exists(tgtPath):
                os.makedirs(tgtPath)
            # if UNZIP folder was NOT specifed,
            # generate a temporary UNZIP folder
            # to be removed after processing
            if unzPath == "":
                unzPath = tempfile.mkdtemp()
                rmvFldr = True
                print ("Unzip folder '%s' was generated." % unzPath)
            else:
                print ("Unzip folder '%s' was specified." % unzPath)
            # make sure the UNZIP folder exists,
            # creating it recursively if it does not
            if not os.path.exists(unzPath):
                os.makedirs(unzPath)
            # unzip the ZIP file
            # to the UNZIP folder
            self.zip2dir(zipFile, unzPath)
            # find the TAB or TXT file in
            # the specified UNZIP folder
            tmpFile = ""
            for aFile in os.listdir(unzPath):
                if aFile.lower().endswith(".tab") or aFile.lower().endswith(".txt"):
                    tmpFile = os.path.join(unzPath, aFile)
                    print ("zipFile = %s" % zipFile)
                    print ("tabFile = %s" % tmpFile)
                    if csvFile == "":
                        csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.tab','.csv').replace('.txt','.csv'))
                    if os.path.dirname(csvFile) == "":
                        csvFile = os.path.join(tgtPath, csvFile)
                    print ("csvFile = %s" % csvFile)
                    # create the CSV files from the TAB file
                    self.tab2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, tabDelimiter, csvDelimiter)
            # verify that a TAB or TXT file was found
            if tmpFile.strip() == "":
                print ("Zip file '%s' did NOT contain a TAB or TXT file!" % zipFile)
            # if UNZIP folder removal specified,
            # remove the UNZIP folder in question
            if rmvFldr:
                shutil.rmtree(unzPath)
                
        return

            
    # -------------------------------------------------------------------------
    # define the ZIP file to a TXT file and then to a CSV file method
    # -------------------------------------------------------------------------
    def zip2txt2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, txtDelimiter=',', csvDelimiter=','):

        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
        unzPath = os.path.expanduser(unzPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
        
        rmvFldr = False
        # if no target folder specified,
        if tgtPath == "":
            # output an error message
            print ("Target folder was NOT specified!")
        else:
            print ("Target folder '%s'was specified." % tgtPath)
            # make sure the target folder exists,
            # creating it recursively if it does not
            if not os.path.exists(tgtPath):            
                os.makedirs(tgtPath)
            # if UNZIP folder was NOT specifed,
            # generate a temporary UNZIP folder
            # to be removed after processing
            if unzPath == "":
                unzPath = tempfile.mkdtemp()
                rmvFldr = True
                print ("Unzip folder '%s' was generated." % unzPath)
            else:
                print ("Unzip folder '%s' was specified." % unzPath)
            # make sure the UNZIP folder exists,
            # creating it recursively if it does not
            if not os.path.exists(unzPath):
                os.makedirs(unzPath)
            # unzip the ZIP file
            # to the UNZIP folder
            self.zip2dir(zipFile, unzPath)
            # find the SHP file in
            # the specified UNZIP folder
            tmpFile = ""
            for aFile in os.listdir(unzPath):
                if aFile.lower().endswith(".txt"):
                    tmpFile = os.path.join(unzPath, aFile)
                    break
            # verify that a SHP file was found
            if tmpFile.strip() == "":
                print ("Zip file '%s' did NOT contain a TXT file!" % zipFile)
                return
            else:
                print ("zipFile = %s" % zipFile)
                print ("txtFile = %s" % tmpFile)
                if csvFile == "":
                    csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.txt','.csv'))
                if os.path.dirname(csvFile) == "":
                    csvFile = os.path.join(tgtPath, csvFile)
                print ("csvFile = %s" % csvFile)
                # create the CSV file from the TXT file
                self.txt2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, txtDelimiter, csvDelimiter)
            # if UNZIP folder removal specified,
            # remove the UNZIP folder in question
            if rmvFldr:
                shutil.rmtree(unzPath)
                
        return


    # -------------------------------------------------------------------------
    # defin the ZIP file to a XLS file, then to a CSV file method
    # -------------------------------------------------------------------------
    def zip2xls2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath):

        # expand any leading tilde
        # to the user's home path
        zipFile = os.path.expanduser(zipFile)
        tgtPath = os.path.expanduser(tgtPath)
        unzPath = os.path.expanduser(unzPath)
         
        # verify that ZIP file exists
        if not os.path.exists(zipFile):
            print ("ZIP file '%s' does NOT exist!" % zipFile)
            return
        
        rmvFldr = False
        # if no target folder specified,
        if tgtPath == "":
            # output an error message
            print ("Target folder was NOT specified!")
        else:
            print ("Target folder '%s'was specified." % tgtPath)
            # make sure the target folder exists,
            # creating it recursively if it does not
            if not os.path.exists(tgtPath):
                os.makedirs(tgtPath)
            # if UNZIP folder was NOT specifed,
            # generate a temporary UNZIP folder
            # to be removed after processing
            if unzPath == "":
                unzPath = tempfile.mkdtemp()
                rmvFldr = True
                print ("Unzip folder '%s' was generated." % unzPath)
            else:
                print ("Unzip folder '%s' was specified." % unzPath)
            # make sure the UNZIP folder exists,
            # creating it recursively if it does not
            if not os.path.exists(unzPath):
                os.makedirs(unzPath)
            # unzip the ZIP file
            # to the UNZIP folder
            self.zip2dir(zipFile, unzPath)
            # find the TAB or TXT file in
            # the specified UNZIP folder
            tmpFile = ""
            for aFile in os.listdir(unzPath):
                if aFile.lower().endswith(".xls") or aFile.lower().endswith(".xlsx"):
                    tmpFile = os.path.join(unzPath, aFile)
                    print ("zipFile = %s" % zipFile)
                    print ("xlsFile = %s" % tmpFile)
                    if csvFile == "":
                        csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.xls','.csv').replace('.xlsx','.csv'))
                    if os.path.dirname(csvFile) == "":
                        csvFile = os.path.join(tgtPath, csvFile)
                    print ("csvFile = %s" % csvFile)
                    # create the CSV files from the XLS file
                    self.xls2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount)
            # verify that a TAB or TXT file was found
            if tmpFile.strip() == "":
                print ("Zip file '%s' did NOT contain a XLS file!" % zipFile)
            # if UNZIP folder removal specified,
            # remove the UNZIP folder in question
            if rmvFldr:
                shutil.rmtree(unzPath)
                
        return
No results found