Created
October 25, 2013 21:52
-
-
Save slotrans/7162372 to your computer and use it in GitHub Desktop.
Buffered converter for MySQL CSV exports done with \b\b\b line endings to work around MySQL's awful handling of embedded newlines. Extracted from larger MySQL->PostgreSQL converter so some of the variable names don't make sense.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
if len(sys.argv) != 3: | |
print "usage: {0} sourcefile targetfile".format(sys.argv[0]) | |
sys.exit(1) | |
stage_first_filename = sys.argv[1] | |
stage_second_filename = sys.argv[2] | |
REGEX_LINE_ENDINGS = r"\x08\x08\x08" | |
BLOCK_SIZE = 1048576 | |
read_buffer = "" | |
with open(stage_first_filename,"rb") as f1: | |
with open(stage_second_filename,"wb") as f2: | |
print "converting...", | |
while True: | |
read_buffer = f1.read(BLOCK_SIZE) | |
if len(read_buffer) == 0: | |
break | |
while True: | |
if not (read_buffer[-1] == "\x08" and read_buffer[-3:-1] != "\x08\x08"): | |
break | |
append_buffer = f1.read(BLOCK_SIZE) | |
if len(append_buffer) == 0: | |
break | |
read_buffer += append_buffer | |
print ".", | |
f2.write(re.sub(REGEX_LINE_ENDINGS, "\n", read_buffer)) | |
print "done" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment