Created
July 24, 2017 23:01
-
-
Save debboutr/27c8f92c47c764e0e37e2c483db239eb to your computer and use it in GitHub Desktop.
Make the upstream numpy array w/o filtering out flowline w/ no associated catchment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, sys | |
import itertools | |
import decimal | |
import struct | |
import numpy as np | |
import pandas as pd | |
import datetime | |
from collections import defaultdict | |
from datetime import datetime as dt | |
from collections import deque, OrderedDict | |
def dbfreader(f): | |
numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32)) | |
numfields = (lenheader - 33) // 32 | |
fields = [] | |
for fieldno in xrange(numfields): | |
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32)) | |
name = name.replace('\0', '') # eliminate NULs from string | |
fields.append((name, typ, size, deci)) | |
yield [field[0] for field in fields] | |
yield [tuple(field[1:]) for field in fields] | |
terminator = f.read(1) | |
assert terminator == '\r' | |
fields.insert(0, ('DeletionFlag', 'C', 1, 0)) | |
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields]) | |
fmtsiz = struct.calcsize(fmt) | |
for i in xrange(numrec): | |
record = struct.unpack(fmt, f.read(fmtsiz)) | |
if record[0] != ' ': | |
continue # deleted record | |
result = [] | |
for (name, typ, size, deci), value in itertools.izip(fields, record): | |
if name == 'DeletionFlag': | |
continue | |
if typ == "N": | |
value = value.replace('\0', '').lstrip() | |
if value == '': | |
value = 0 | |
elif deci: | |
value = decimal.Decimal(value) | |
else: | |
value = int(value) | |
elif typ == 'C': | |
value = value.rstrip() | |
elif typ == 'D': | |
try: | |
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8]) | |
value = datetime.date(y, m, d) | |
except: | |
value = None | |
elif typ == 'L': | |
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?' | |
elif typ == 'F': | |
value = float(value) | |
result.append(value) | |
yield result | |
def dbf2DF(f, upper=True): | |
data = list(dbfreader(open(f, 'rb'))) | |
if upper == False: | |
return pd.DataFrame(data[2:], columns=data[0]) | |
else: | |
return pd.DataFrame(data[2:], columns=map(str.upper,data[0])) | |
def bastards(token, tree, chkset=None): | |
visited = set() | |
to_crawl = deque([token]) | |
while to_crawl: | |
current = to_crawl.popleft() | |
if current in visited: | |
continue | |
visited.add(current) | |
node_children = set(tree[current]) | |
to_crawl.extendleft(node_children - visited) | |
visited.remove(token) | |
if chkset != None: | |
visited = visited.intersection(chkset) | |
return list(visited) | |
def UpcomDict(nhd, interVPUtbl, zone): | |
#Returns UpCOMs dictionary for accumulation process | |
#Provide either path to from-to tables or completed from-to table | |
flow = dbf2DF("%s/NHDPlusAttributes/PlusFlow.dbf" % (nhd))[['TOCOMID', | |
'FROMCOMID']] | |
flow = flow[flow.TOCOMID != 0] | |
# check to see if out of zone values have FTYPE = 'Coastline' | |
fls = dbf2DF("%s/NHDSnapshot/Hydrography/NHDFlowline.dbf" % (nhd)) | |
coastfl = fls.COMID[fls.FTYPE == 'Coastline'] | |
flow = flow[~flow.FROMCOMID.isin(coastfl.values)] | |
# remove these FROMCOMIDs from the 'flow' table, there are three COMIDs here | |
# that won't get filtered out | |
remove = interVPUtbl.removeCOMs.values[interVPUtbl.removeCOMs.values!=0] | |
flow = flow[~flow.FROMCOMID.isin(remove)] | |
# find values that are coming from other zones and remove the ones that | |
# aren't in the interVPU table | |
out = np.nonzero(np.setdiff1d(flow.FROMCOMID.values,fls.COMID.values)) | |
flow = flow[~flow.FROMCOMID.isin( | |
np.setdiff1d(out, interVPUtbl.thruCOMIDs.values))] | |
# Now table is ready for processing and the UpCOMs dict can be created | |
fcom,tcom = flow.FROMCOMID.values,flow.TOCOMID.values | |
UpCOMs = defaultdict(list) | |
for i in range(0, len(flow), 1): | |
FROMCOMID = fcom[i] | |
if FROMCOMID == 0: | |
UpCOMs[tcom[i]] = [] | |
else: | |
UpCOMs[tcom[i]].append(FROMCOMID) | |
# add IDs from UpCOMadd column if working in ToZone | |
for interLine in interVPUtbl.values: | |
if interLine[6] > 0 and interLine[2] == zone: | |
UpCOMs[int(interLine[6])].append(int(interLine[0])) | |
return UpCOMs | |
def makeNumpyVectors2(d, interVPUtbl, inputs, NHD_dir): | |
os.mkdir(d + '/bastards') | |
for zone in inputs: | |
if not os.path.exists('%s/bastards/accum_%s.npz' % (d,zone)): | |
print zone | |
hydroregion = inputs[zone] | |
print 'Making UpStreamComs dictionary...' | |
start_time = dt.now() | |
NHD_pre = '%s/NHDPlus%s/NHDPlus%s' % (NHD_dir, hydroregion, zone) | |
UpStreamComs = UpcomDict(NHD_pre, interVPUtbl, zone) | |
print("--- %s seconds ---" % (dt.now() - start_time)) | |
print '....' | |
print 'Making numpy bastard vectors...' | |
start_time = dt.now() | |
tbl_dir = NHD_dir + "/NHDPlus%s/NHDPlus%s/NHDSnapshot/Hydrography/NHDFlowline.dbf" % (hydroregion, zone) | |
catch = dbf2DF(tbl_dir).set_index('COMID') | |
COMIDs = np.append(np.array(catch.index),np.array(interVPUtbl.ix[np.logical_and((np.array(interVPUtbl.ToZone) == zone),(np.array(interVPUtbl.DropCOMID) == 0))].thruCOMIDs)) | |
del catch | |
if 0 in COMIDs: | |
COMIDs = np.delete(COMIDs,np.where(COMIDs == 0)) | |
a = map(lambda x: bastards(x, UpStreamComs), COMIDs) | |
lengths = np.array([len(v) for v in a]) | |
a = np.int32(np.hstack(np.array(a))) #Convert to 1d vector | |
np.savez_compressed('%s/bastards/accum_%s.npz' % (d,zone), comids=COMIDs,lengths=lengths,upstream=a) | |
print("--- %s seconds ---" % (dt.now() - start_time)) | |
print '___________________' | |
inputs = OrderedDict([('06', 'MS'), | |
('05', 'MS'), | |
('10U', 'MS'), | |
('10L', 'MS'), | |
('07', 'MS'), | |
('11', 'MS'), | |
('14', 'CO'), | |
('01', 'NE'), | |
('17', 'PN'), | |
('16', 'GB'), | |
('15', 'CO'), | |
('13', 'RG'), | |
('12', 'TX'), | |
('09', 'SR'), | |
('02', 'MA'), | |
('08', 'MS'), | |
('04', 'GL'), | |
('03W', 'SA'), | |
('03S', 'SA'), | |
('03N', 'SA'), | |
('18', 'CA')]) | |
sys.path.append('D:\\Projects\\StreamCat') | |
interVPUtbl = pd.read_csv('D:/Projects/StreamCat/InterVPU.csv') | |
here = 'D:/Projects/CDI/Numpy_ALL2' | |
nhd = 'D:/NHDPlusV21' | |
makeNumpyVectors2(here, interVPUtbl, inputs, nhd) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment