Skip to content

Instantly share code, notes, and snippets.

@w1ndy
Created November 13, 2016 15:01
Show Gist options
  • Save w1ndy/237086863d3ef4c8f502d8e4c587ee49 to your computer and use it in GitHub Desktop.
Save w1ndy/237086863d3ef4c8f502d8e4c587ee49 to your computer and use it in GitHub Desktop.
Remove spaces padded to columns in CSV with progress indicator
#!/usr/bin/python3
import sys
from tqdm import tqdm
from os.path import splitext, basename, dirname, abspath, join, getsize
def main():
if len(sys.argv) < 2:
print('Usage: %s ...csv_files' % sys.argv[0])
exit()
for fname in sys.argv[1:]:
print('Trimming %s...' % fname)
with open(fname, 'r') as fin:
dest = '%s.trim.csv' % (splitext(basename(fname))[0])
dest = join(dirname(abspath(fname)), dest)
fout = open(dest, 'w')
with tqdm(total=getsize(fname), unit='byte') as bar:
for line in fin:
bar.update(len(line.encode('utf8')) + 1)
fout.write(','.join(map(lambda x: x.strip(), line.split(','))) + '\n')
fout.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment