Skip to content

Instantly share code, notes, and snippets.

@hampelm
Created May 3, 2018 03:13
Show Gist options
  • Save hampelm/541da7f1c3fbc455a112fc9cb22c7118 to your computer and use it in GitHub Desktop.
Save hampelm/541da7f1c3fbc455a112fc9cb22c7118 to your computer and use it in GitHub Desktop.
import csv
import re
skiplines = ['RUN DATE', '36TH DISTRICT COURT', '421 MADISON AVENUE',
'DETROIT MI 48226', 'MATTHEW REPORT', 'Case Type', 'I01', 'NAME CASE NUMBER']
def skippable(line):
for phrase in skiplines:
if phrase in line:
return True
return False
def parse_address(line):
return dict(
address=line[5:38].strip(),
city=line[38:56].strip(),
state=line[56:58].strip(),
zip=line[59:64].strip()
)
def parse_name(line):
return dict(
name=line[0:17].strip(),
case=line[17:29].strip(),
file_date=line[35:45].strip(),
close_date=line[46:].strip()
)
def merge_details(info, case, header):
for key in info:
case[header + '_' + key] = info[key]
return case
keys = set()
cases = []
case = None
next_line_type = None
next_line_header = None
with open('010109-123111.txt') as infile:
for line in infile:
if skippable(line):
continue
line = line.strip()
# P01 means the start of a new case
if 'P01' in line:
if case != None:
cases.append(case)
keys |= set(list(case.keys()))
case = {}
details = parse_address(line)
case = merge_details(details, case, 'P01')
next_line_type = 'details'
next_line_header = 'P01'
continue
# Otherwise we add the details to to the existing case.
# The line might be a second plaintiff or the start of the defendant
if re.search(r'^[A-Z]\d\d', line):
header = line[0:3]
next_line_type = 'details'
next_line_header = header
details = parse_address(line)
case = merge_details(details, case, header)
continue
# The line after a plaintiff or defendant is their name and info
if next_line_type:
details = parse_name(line)
case = merge_details(details, case, next_line_header)
next_line_header = None
next_line_type = None
keys = sorted(keys)
print(keys)
f = open('010109-123111.results.csv', 'w')
w = csv.DictWriter(f, keys)
w.writeheader()
w.writerows(cases)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment