Created
November 13, 2019 20:03
-
-
Save rochacbruno/059e1223f5b69d3b99b509a4962cf3aa to your computer and use it in GitHub Desktop.
Read fixed width text file in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
from operator import itemgetter | |
from pathlib import Path | |
def get_struct_unpacker(fieldspecs, istart, iwidth): | |
""" | |
Build the format string for struct.unpack to use, based on the fieldspecs. | |
fieldspecs is a list of [name, start, width] arrays. | |
Returns a string like "6s2s3s7x7s4x9s". | |
""" | |
unpack_len = 0 | |
unpack_fmt = "" | |
for fieldspec in fieldspecs: | |
start = fieldspec[istart] - 1 | |
end = start + fieldspec[iwidth] | |
if start > unpack_len: | |
unpack_fmt += str(start - unpack_len) + "x" | |
unpack_fmt += str(end - start) + "s" | |
unpack_len = end | |
struct_unpacker = struct.Struct(unpack_fmt).unpack_from | |
return struct_unpacker | |
fieldspecs = [ | |
# Name, Start, Width, Type | |
["ID", 1, 6, int], | |
["NAME", 9, 14, str], | |
["Twitter", 24, 13, str] | |
] | |
iname, istart, iwidth, itype = 0, 1, 2, 3 # field indexes | |
fieldspecs.sort(key=itemgetter(istart)) | |
struct_unpacker = get_struct_unpacker(fieldspecs, istart, iwidth) | |
field_indices = range(len(fieldspecs)) | |
data = [] | |
for line in Path('./data.txt').open(): | |
raw_fields = struct_unpacker(line.encode()) # split line into field values | |
line_data = {} | |
for i in field_indices: | |
fieldspec = fieldspecs[i] | |
fieldname = fieldspec[iname] | |
cast = fieldspec[itype] | |
value = cast(raw_fields[i].decode().strip()) | |
line_data[fieldname] = value | |
data.append(line_data) | |
print(data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
123456 Bruno Rocha @rochacbruno | |
456789 Rodolfo Viana @rodolfoviana |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{'ID': 123456, 'NAME': 'Bruno Rocha', 'Twitter': '@rochacbruno'}, {'ID': 456789, 'NAME': 'Rodolfo Viana', 'Twitter': '@rodolfoviana'}] |
Nice work. I would like to use your code to search a fixed width file that have up to 10,0000 rows of records. I want to enter a search string at the prompt and have the script return records from specific locations on a line or record line.
Is there a way to read specific line of record in the file? Let's say I want to read a record at line 110 or extract a value from position 129 on line 110?
Thank you.
Fulton
New to Python but Like to see how to take this result and Insert into MSSQL table. I have file with over 1600 columns which I can get start point and width and if this code can read my question is what should I do or use in order to insert into a table. TIA
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
muito bom!