Skip to content

Instantly share code, notes, and snippets.

@mgax
Created April 16, 2010 14:15
Show Gist options
  • Save mgax/368449 to your computer and use it in GitHub Desktop.
Save mgax/368449 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
""" Read a tab-separated CSV file and print its contents, padding values so
they align into pretty columns. """
import csv
from collections import defaultdict
from optparse import OptionParser
option_parser = OptionParser()
option_parser.add_option("-e", default="utf-8", dest="encoding")
option_parser.add_option("-s", default="\t", dest="separator")
def pretty_print(datafile, encoding, separator):
normalize = lambda line: unicode(line, encoding).replace('\t', ' '*4)
lengths = defaultdict(int)
for line in csv.reader(datafile, delimiter=separator):
# first pass: decide on column widths
for n, value in enumerate(line):
value = normalize(value)
lengths[n] = max(lengths[n], len(value))
if n > 10000: # if the file is too big, bail out
break
datafile.seek(0)
for line in csv.reader(datafile, delimiter=separator):
# second pass: print the actual data
for n, value in enumerate(line):
value = normalize(value)
print ((u'%%-%ds ' % lengths[n]) % value).encode(encoding),
print
def main():
options, args = option_parser.parse_args()
if len(args) != 1:
option_parser.error("Please specify one file name")
with open(args[0], 'rt') as datafile:
pretty_print(datafile, options.encoding, options.separator)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment