Skip to content

Instantly share code, notes, and snippets.

@debboutr
Created July 24, 2017 23:01
Show Gist options
  • Save debboutr/27c8f92c47c764e0e37e2c483db239eb to your computer and use it in GitHub Desktop.
Save debboutr/27c8f92c47c764e0e37e2c483db239eb to your computer and use it in GitHub Desktop.
Make the upstream numpy array w/o filtering out flowline w/ no associated catchment
import os, sys
import itertools
import decimal
import struct
import numpy as np
import pandas as pd
import datetime
from collections import defaultdict
from datetime import datetime as dt
from collections import deque, OrderedDict
def dbfreader(f):
numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
numfields = (lenheader - 33) // 32
fields = []
for fieldno in xrange(numfields):
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
name = name.replace('\0', '') # eliminate NULs from string
fields.append((name, typ, size, deci))
yield [field[0] for field in fields]
yield [tuple(field[1:]) for field in fields]
terminator = f.read(1)
assert terminator == '\r'
fields.insert(0, ('DeletionFlag', 'C', 1, 0))
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
fmtsiz = struct.calcsize(fmt)
for i in xrange(numrec):
record = struct.unpack(fmt, f.read(fmtsiz))
if record[0] != ' ':
continue # deleted record
result = []
for (name, typ, size, deci), value in itertools.izip(fields, record):
if name == 'DeletionFlag':
continue
if typ == "N":
value = value.replace('\0', '').lstrip()
if value == '':
value = 0
elif deci:
value = decimal.Decimal(value)
else:
value = int(value)
elif typ == 'C':
value = value.rstrip()
elif typ == 'D':
try:
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
value = datetime.date(y, m, d)
except:
value = None
elif typ == 'L':
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
elif typ == 'F':
value = float(value)
result.append(value)
yield result
def dbf2DF(f, upper=True):
data = list(dbfreader(open(f, 'rb')))
if upper == False:
return pd.DataFrame(data[2:], columns=data[0])
else:
return pd.DataFrame(data[2:], columns=map(str.upper,data[0]))
def bastards(token, tree, chkset=None):
visited = set()
to_crawl = deque([token])
while to_crawl:
current = to_crawl.popleft()
if current in visited:
continue
visited.add(current)
node_children = set(tree[current])
to_crawl.extendleft(node_children - visited)
visited.remove(token)
if chkset != None:
visited = visited.intersection(chkset)
return list(visited)
def UpcomDict(nhd, interVPUtbl, zone):
#Returns UpCOMs dictionary for accumulation process
#Provide either path to from-to tables or completed from-to table
flow = dbf2DF("%s/NHDPlusAttributes/PlusFlow.dbf" % (nhd))[['TOCOMID',
'FROMCOMID']]
flow = flow[flow.TOCOMID != 0]
# check to see if out of zone values have FTYPE = 'Coastline'
fls = dbf2DF("%s/NHDSnapshot/Hydrography/NHDFlowline.dbf" % (nhd))
coastfl = fls.COMID[fls.FTYPE == 'Coastline']
flow = flow[~flow.FROMCOMID.isin(coastfl.values)]
# remove these FROMCOMIDs from the 'flow' table, there are three COMIDs here
# that won't get filtered out
remove = interVPUtbl.removeCOMs.values[interVPUtbl.removeCOMs.values!=0]
flow = flow[~flow.FROMCOMID.isin(remove)]
# find values that are coming from other zones and remove the ones that
# aren't in the interVPU table
out = np.nonzero(np.setdiff1d(flow.FROMCOMID.values,fls.COMID.values))
flow = flow[~flow.FROMCOMID.isin(
np.setdiff1d(out, interVPUtbl.thruCOMIDs.values))]
# Now table is ready for processing and the UpCOMs dict can be created
fcom,tcom = flow.FROMCOMID.values,flow.TOCOMID.values
UpCOMs = defaultdict(list)
for i in range(0, len(flow), 1):
FROMCOMID = fcom[i]
if FROMCOMID == 0:
UpCOMs[tcom[i]] = []
else:
UpCOMs[tcom[i]].append(FROMCOMID)
# add IDs from UpCOMadd column if working in ToZone
for interLine in interVPUtbl.values:
if interLine[6] > 0 and interLine[2] == zone:
UpCOMs[int(interLine[6])].append(int(interLine[0]))
return UpCOMs
def makeNumpyVectors2(d, interVPUtbl, inputs, NHD_dir):
os.mkdir(d + '/bastards')
for zone in inputs:
if not os.path.exists('%s/bastards/accum_%s.npz' % (d,zone)):
print zone
hydroregion = inputs[zone]
print 'Making UpStreamComs dictionary...'
start_time = dt.now()
NHD_pre = '%s/NHDPlus%s/NHDPlus%s' % (NHD_dir, hydroregion, zone)
UpStreamComs = UpcomDict(NHD_pre, interVPUtbl, zone)
print("--- %s seconds ---" % (dt.now() - start_time))
print '....'
print 'Making numpy bastard vectors...'
start_time = dt.now()
tbl_dir = NHD_dir + "/NHDPlus%s/NHDPlus%s/NHDSnapshot/Hydrography/NHDFlowline.dbf" % (hydroregion, zone)
catch = dbf2DF(tbl_dir).set_index('COMID')
COMIDs = np.append(np.array(catch.index),np.array(interVPUtbl.ix[np.logical_and((np.array(interVPUtbl.ToZone) == zone),(np.array(interVPUtbl.DropCOMID) == 0))].thruCOMIDs))
del catch
if 0 in COMIDs:
COMIDs = np.delete(COMIDs,np.where(COMIDs == 0))
a = map(lambda x: bastards(x, UpStreamComs), COMIDs)
lengths = np.array([len(v) for v in a])
a = np.int32(np.hstack(np.array(a))) #Convert to 1d vector
np.savez_compressed('%s/bastards/accum_%s.npz' % (d,zone), comids=COMIDs,lengths=lengths,upstream=a)
print("--- %s seconds ---" % (dt.now() - start_time))
print '___________________'
inputs = OrderedDict([('06', 'MS'),
('05', 'MS'),
('10U', 'MS'),
('10L', 'MS'),
('07', 'MS'),
('11', 'MS'),
('14', 'CO'),
('01', 'NE'),
('17', 'PN'),
('16', 'GB'),
('15', 'CO'),
('13', 'RG'),
('12', 'TX'),
('09', 'SR'),
('02', 'MA'),
('08', 'MS'),
('04', 'GL'),
('03W', 'SA'),
('03S', 'SA'),
('03N', 'SA'),
('18', 'CA')])
sys.path.append('D:\\Projects\\StreamCat')
interVPUtbl = pd.read_csv('D:/Projects/StreamCat/InterVPU.csv')
here = 'D:/Projects/CDI/Numpy_ALL2'
nhd = 'D:/NHDPlusV21'
makeNumpyVectors2(here, interVPUtbl, inputs, nhd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment