Created
February 4, 2011 04:44
-
-
Save sansumbrella/810744 to your computer and use it in GitHub Desktop.
Patches holes in tabular data. Good for cleaning up TSV and CSV files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
# David Wicks | |
""" | |
patcher.py | |
Fills holes in tabular data files (TSV) | |
basic usage: python patcher.py fileToPatch.tsv | |
more control: python patcher.py fileToPatch.csv outputfile.csv replacementValue , | |
""" | |
import sys | |
import os | |
# for command-line execution | |
def main(argv=None): | |
if argv is None: | |
argv = sys.argv | |
try: | |
args = len( argv ) - 1 | |
if( args == 1 ): | |
patch( argv[1] ) | |
if( args == 2 ): | |
patch( argv[1], argv[2] ) | |
if( args == 3 ): | |
patch( argv[1], argv[2], argv[3] ) | |
if( args == 4 ): | |
patch( argv[1], argv[2], argv[3], argv[4] ) | |
except: | |
print "That didn't work. Try including a filename to load." | |
# does the patching | |
def patch( filename, outputName="patched.tsv", filler="empty", sep="\t" ): | |
file = open( filename, 'r' ) | |
lines = file.readlines() | |
file.close() | |
patchedLines = [] | |
for line in lines: | |
while not line.find( sep + sep ) == -1: | |
line = line.replace( sep + sep, sep + filler + sep ) | |
patchedLines.append( line ) | |
output = open( outputName, 'w' ) | |
output.write( "".join( patchedLines ) ) | |
output.close() | |
print "Patched holes %s with '%s' and saved to %s." % ( filename, filler, outputName ) | |
def patchLine( line, patch ): | |
if( line == '' ): | |
return patch | |
return line | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment