Last active
August 29, 2015 14:20
-
-
Save bertday/f8dd28de125007c9efab to your computer and use it in GitHub Desktop.
Convert Parking Ticket Dump to CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
''' | |
CONFIG | |
''' | |
IN_FILE_NAME = 'PH_CITY_EXTRACT_2014' | |
FIELDS = [ | |
{'name': 'ticket', 'length': 11}, | |
{'name': 'issue_date', 'length': 10}, | |
{'name': 'issue_time', 'length': 5}, | |
{'name': 'state', 'length': 2}, | |
{'name': 'plate', 'length': 8}, | |
{'name': 'division', 'length': 4}, | |
{'name': 'location', 'length': 35}, | |
{'name': 'violation', 'length': 23}, | |
{'name': 'fine', 'length': 6}, | |
{'name': 'issuing_agency', 'length': 6}, | |
] | |
''' | |
MAIN | |
''' | |
# Compute start/stop indexes | |
for i, field in enumerate(FIELDS): | |
if i == 0: | |
field['start'] = 0 | |
field['stop'] = field['length'] | |
else: | |
last_field = FIELDS[i - 1] | |
field['start'] = last_field['stop'] | |
field['stop'] = field['start'] + field['length'] | |
# Set up files | |
in_file = open(IN_FILE_NAME) | |
out_file = open('{}.csv'.format(IN_FILE_NAME), 'wb') | |
out_writer = csv.writer(out_file, quoting=csv.QUOTE_ALL) | |
# Write headers | |
out_writer.writerow([field['name'] for field in FIELDS]) | |
# Loop over rows | |
for i, row in enumerate(in_file.readlines()): | |
if i % 100000 == 0: | |
print i | |
# Remove newline | |
row = row[:-1] | |
out_vals = [] | |
for field in FIELDS: | |
start = field['start'] | |
stop = field['stop'] | |
val = row[start:stop] | |
val = val.strip() | |
out_vals.append(val) | |
out_writer.writerow(out_vals) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment