Created
February 13, 2009 09:12
-
-
Save brendano/63810 to your computer and use it in GitHub Desktop.
csv2tsv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.6 | |
""" | |
Input is Excel-style CSV. Either stdin or filename. | |
Output is honest-to-goodness tsv: no quoting or any \\n\\r\\t. | |
""" | |
from __future__ import print_function | |
import csv, sys | |
warning_count=0 | |
warning_max = 50 | |
def warning(s): | |
global warning_count | |
warning_count += 1 | |
if warning_count > warning_max: return | |
print("WARNING:", s, file=sys.stderr) | |
def cell_text_clean(text): | |
s = text | |
#s = text.encode("utf-8") | |
if "\t" in s: warning("Clobbering embedded tab") | |
if "\n" in s: warning("Clobbering embedded newline") | |
if "\r" in s: warning("Clobbering embedded carriage return") | |
s = s.replace("\t"," ").replace("\n"," ").replace("\r"," ") | |
return s | |
def clean_row(row): | |
return [cell_text_clean(x) for x in row] | |
#return [x.replace("\n"," ").replace("\t"," ").replace("\r"," ") for x in row] | |
#print row | |
#return [x.encode('utf-8').replace("\n"," ").replace("\t"," ").replace("\r"," ") for x in row] | |
#return [x.replace("\n"," ").replace("\t"," ").replace("\r"," ").encode('utf-8') for x in row] | |
args = sys.argv[:] | |
args.pop(0) | |
if len(args)==1: | |
reader = csv.reader(open(args[0])) | |
elif len(args) > 1: | |
raise Exception("No support for multiple files yet") | |
# could try to enforce conformity, or meld them together, etc. | |
elif not sys.stdin.isatty(): | |
reader = csv.reader(sys.stdin) | |
else: | |
print(__doc__.strip()) | |
sys.exit(1) | |
#header = reader.next() | |
#print "\t".join(clean_row(header)) | |
#print(sys.stdout.encoding) | |
#sys.exit(0) | |
for row in reader: | |
#print "\t".join(clean_row(row)) | |
print(*clean_row(row), sep="\t") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment