hampelm · May 3, 2018 03:13
diff --git a/foreclosures.py b/foreclosures.py
 import csv
 import re

 skiplines = ['RUN DATE', '36TH DISTRICT COURT', '421 MADISON AVENUE',
             'DETROIT              MI 48226', 'MATTHEW REPORT', 'Case Type', 'I01', 'NAME             CASE NUMBER']


 def skippable(line):
    for phrase in skiplines:
        if phrase in line:
            return True

    return False


 def parse_address(line):
    return dict(
        address=line[5:38].strip(),
        city=line[38:56].strip(),
        state=line[56:58].strip(),
        zip=line[59:64].strip()
    )


 def parse_name(line):
    return dict(
        name=line[0:17].strip(),
        case=line[17:29].strip(),
        file_date=line[35:45].strip(),
        close_date=line[46:].strip()
    )


 def merge_details(info, case, header):
    for key in info:
        case[header + '_' + key] = info[key]

    return case


 keys = set()
 cases = []
 case = None
 next_line_type = None
 next_line_header = None
 with open('010109-123111.txt') as infile:
    for line in infile:
        if skippable(line):
            continue

        line = line.strip()

        # P01 means the start of a new case
        if 'P01' in line:
            if case != None:
                cases.append(case)
                keys |= set(list(case.keys()))
            case = {}
            details = parse_address(line)
            case = merge_details(details, case, 'P01')
            next_line_type = 'details'
            next_line_header = 'P01'
            continue

        # Otherwise we add the details to to the existing case.
        # The line might be a second plaintiff or the start of the defendant
        if re.search(r'^[A-Z]\d\d', line):
            header = line[0:3]
            next_line_type = 'details'
            next_line_header = header
            details = parse_address(line)
            case = merge_details(details, case, header)
            continue

        # The line after a plaintiff or defendant is their name and info
        if next_line_type:
            details = parse_name(line)
            case = merge_details(details, case, next_line_header)
            next_line_header = None
            next_line_type = None

 keys = sorted(keys)
 print(keys)

 f = open('010109-123111.results.csv', 'w')
 w = csv.DictWriter(f, keys)
 w.writeheader()
 w.writerows(cases)
 f.close()
	import csv
	import re

	skiplines = ['RUN DATE', '36TH DISTRICT COURT', '421 MADISON AVENUE',
	'DETROIT MI 48226', 'MATTHEW REPORT', 'Case Type', 'I01', 'NAME CASE NUMBER']


	def skippable(line):
	for phrase in skiplines:
	if phrase in line:
	return True

	return False


	def parse_address(line):
	return dict(
	address=line[5:38].strip(),
	city=line[38:56].strip(),
	state=line[56:58].strip(),
	zip=line[59:64].strip()
	)


	def parse_name(line):
	return dict(
	name=line[0:17].strip(),
	case=line[17:29].strip(),
	file_date=line[35:45].strip(),
	close_date=line[46:].strip()
	)


	def merge_details(info, case, header):
	for key in info:
	case[header + '_' + key] = info[key]

	return case


	keys = set()
	cases = []
	case = None
	next_line_type = None
	next_line_header = None
	with open('010109-123111.txt') as infile:
	for line in infile:
	if skippable(line):
	continue

	line = line.strip()

	# P01 means the start of a new case
	if 'P01' in line:
	if case != None:
	cases.append(case)
	keys \|= set(list(case.keys()))
	case = {}
	details = parse_address(line)
	case = merge_details(details, case, 'P01')
	next_line_type = 'details'
	next_line_header = 'P01'
	continue

	# Otherwise we add the details to to the existing case.
	# The line might be a second plaintiff or the start of the defendant
	if re.search(r'^[A-Z]\d\d', line):
	header = line[0:3]
	next_line_type = 'details'
	next_line_header = header
	details = parse_address(line)
	case = merge_details(details, case, header)
	continue

	# The line after a plaintiff or defendant is their name and info
	if next_line_type:
	details = parse_name(line)
	case = merge_details(details, case, next_line_header)
	next_line_header = None
	next_line_type = None

	keys = sorted(keys)
	print(keys)

	f = open('010109-123111.results.csv', 'w')
	w = csv.DictWriter(f, keys)
	w.writeheader()
	w.writerows(cases)
	f.close()