Created
May 3, 2018 03:13
-
-
Save hampelm/541da7f1c3fbc455a112fc9cb22c7118 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import re | |
skiplines = ['RUN DATE', '36TH DISTRICT COURT', '421 MADISON AVENUE', | |
'DETROIT MI 48226', 'MATTHEW REPORT', 'Case Type', 'I01', 'NAME CASE NUMBER'] | |
def skippable(line): | |
for phrase in skiplines: | |
if phrase in line: | |
return True | |
return False | |
def parse_address(line): | |
return dict( | |
address=line[5:38].strip(), | |
city=line[38:56].strip(), | |
state=line[56:58].strip(), | |
zip=line[59:64].strip() | |
) | |
def parse_name(line): | |
return dict( | |
name=line[0:17].strip(), | |
case=line[17:29].strip(), | |
file_date=line[35:45].strip(), | |
close_date=line[46:].strip() | |
) | |
def merge_details(info, case, header): | |
for key in info: | |
case[header + '_' + key] = info[key] | |
return case | |
keys = set() | |
cases = [] | |
case = None | |
next_line_type = None | |
next_line_header = None | |
with open('010109-123111.txt') as infile: | |
for line in infile: | |
if skippable(line): | |
continue | |
line = line.strip() | |
# P01 means the start of a new case | |
if 'P01' in line: | |
if case != None: | |
cases.append(case) | |
keys |= set(list(case.keys())) | |
case = {} | |
details = parse_address(line) | |
case = merge_details(details, case, 'P01') | |
next_line_type = 'details' | |
next_line_header = 'P01' | |
continue | |
# Otherwise we add the details to to the existing case. | |
# The line might be a second plaintiff or the start of the defendant | |
if re.search(r'^[A-Z]\d\d', line): | |
header = line[0:3] | |
next_line_type = 'details' | |
next_line_header = header | |
details = parse_address(line) | |
case = merge_details(details, case, header) | |
continue | |
# The line after a plaintiff or defendant is their name and info | |
if next_line_type: | |
details = parse_name(line) | |
case = merge_details(details, case, next_line_header) | |
next_line_header = None | |
next_line_type = None | |
keys = sorted(keys) | |
print(keys) | |
f = open('010109-123111.results.csv', 'w') | |
w = csv.DictWriter(f, keys) | |
w.writeheader() | |
w.writerows(cases) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment