Skip to content

Instantly share code, notes, and snippets.

@stucka
Last active November 14, 2019 19:58
Show Gist options
  • Save stucka/db2557a9e1a2e075dd58102ecf560944 to your computer and use it in GitHub Desktop.
Save stucka/db2557a9e1a2e075dd58102ecf560944 to your computer and use it in GitHub Desktop.
Python -- better parsing of fixed-width data
# Preview your column widths easier with regex101.com -- makes it so much easier
from collections import OrderedDict
headers = OrderedDict([
("id", 5),
("name", 25),
("attribute", 8)
])
myregex = ""
for item in headers:
myregex += "(.{" + str(headers[item]) + "})"
print("Test at regex101.com:\r\n\t\t" + myregex)
index = 1
print("How your stuff lines up, with starting position of 1:")
largestitem = 0
for item in headers:
if len(item) > largestitem:
largestitem = len(item)
for item in headers:
print(f"{item}{((largestitem - len(item)) + 3) * ' '}{index}\t{headers[item] - 1 + index}")
index += headers[item]
for row in mydata:
line = OrderedDict()
counter = 0
for item in headers:
line[item] = row[counter:headers[item]+counter].strip()
counter += headers[item]
DoSomething(line)
# Sample conversion to CSV
# Untested. Probably a typo or three.
# For Python 3; if using Python 2, nix the newline stuff
from collections import OrderedDict
import csv
sourcefile = "source.txt"
targetfile = "target.csv"
# Define your fields and column widths here
headers = OrderedDict([
("id", 5),
("name", 25),
("attribute", 8)
])
myregex = ""
for item in headers:
myregex += "(.{" + str(headers[item]) + "})"
print("Test at regex101.com:\r\n\t\t" + myregex)
index = 1
print("How your stuff lines up, with starting position of 1:")
largestitem = 0
for item in headers:
if len(item) > largestitem:
largestitem = len(item)
for item in headers:
print(f"{item}{((largestitem - len(item)) + 3) * ' '}{index}\t{headers[item] - 1 + index}")
index += headers[item]
with open(targetfile, "w", newline="") as targetfilehandle:
writer = csv.writer(targetfilehandle)
writer.writerow(list(headers.keys())) # Write the CSV header
with open(sourcefile, "r") as sourcefilehandle:
inputdata = sourcefilehandle.readlines()
for row in inputdata:
line = OrderedDict()
counter = 0
for item in headers:
line[item] = row[counter:headers[item]+counter].strip()
counter += headers[item]
writer.writerow(list(row.values())))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment