Last active
November 14, 2019 19:58
-
-
Save stucka/db2557a9e1a2e075dd58102ecf560944 to your computer and use it in GitHub Desktop.
Python -- better parsing of fixed-width data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Preview your column widths easier with regex101.com -- makes it so much easier | |
from collections import OrderedDict | |
headers = OrderedDict([ | |
("id", 5), | |
("name", 25), | |
("attribute", 8) | |
]) | |
myregex = "" | |
for item in headers: | |
myregex += "(.{" + str(headers[item]) + "})" | |
print("Test at regex101.com:\r\n\t\t" + myregex) | |
index = 1 | |
print("How your stuff lines up, with starting position of 1:") | |
largestitem = 0 | |
for item in headers: | |
if len(item) > largestitem: | |
largestitem = len(item) | |
for item in headers: | |
print(f"{item}{((largestitem - len(item)) + 3) * ' '}{index}\t{headers[item] - 1 + index}") | |
index += headers[item] | |
for row in mydata: | |
line = OrderedDict() | |
counter = 0 | |
for item in headers: | |
line[item] = row[counter:headers[item]+counter].strip() | |
counter += headers[item] | |
DoSomething(line) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sample conversion to CSV | |
# Untested. Probably a typo or three. | |
# For Python 3; if using Python 2, nix the newline stuff | |
from collections import OrderedDict | |
import csv | |
sourcefile = "source.txt" | |
targetfile = "target.csv" | |
# Define your fields and column widths here | |
headers = OrderedDict([ | |
("id", 5), | |
("name", 25), | |
("attribute", 8) | |
]) | |
myregex = "" | |
for item in headers: | |
myregex += "(.{" + str(headers[item]) + "})" | |
print("Test at regex101.com:\r\n\t\t" + myregex) | |
index = 1 | |
print("How your stuff lines up, with starting position of 1:") | |
largestitem = 0 | |
for item in headers: | |
if len(item) > largestitem: | |
largestitem = len(item) | |
for item in headers: | |
print(f"{item}{((largestitem - len(item)) + 3) * ' '}{index}\t{headers[item] - 1 + index}") | |
index += headers[item] | |
with open(targetfile, "w", newline="") as targetfilehandle: | |
writer = csv.writer(targetfilehandle) | |
writer.writerow(list(headers.keys())) # Write the CSV header | |
with open(sourcefile, "r") as sourcefilehandle: | |
inputdata = sourcefilehandle.readlines() | |
for row in inputdata: | |
line = OrderedDict() | |
counter = 0 | |
for item in headers: | |
line[item] = row[counter:headers[item]+counter].strip() | |
counter += headers[item] | |
writer.writerow(list(row.values()))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment