Last active
August 29, 2015 14:20
-
-
Save Lucretiel/98b7886404d17601e7f8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function, unicode_literals | |
from tempfile import SpooledTemporaryFile as TempFile | |
from shutil import copyfileobj | |
from argparse import ArgumentParser | |
from io import open | |
def trim_white(istr): | |
trailing_empty_lines = 0 | |
for line in istr: | |
line = line.rstrip() | |
if line: | |
yield "{0}{1}\n".format('\n' * trailing_empty_lines, line) | |
trailing_empty_lines = 0 | |
else: | |
trailing_empty_lines += 1 | |
def main(): | |
parser = ArgumentParser() | |
parser.add_argument('input_file') | |
parser.add_argument('output_file', nargs='?', default=None) | |
parser.add_argument('-e', '--encoding', default='utf-8') | |
parser.add_argument('-m', '--memory', default=1024 * 1024, type=int, | |
help="The maximum amount of memory to use as a buffer, before writing " | |
"to a temporary file") | |
args = parser.parse_args() | |
input_file = args.input_file | |
output_file = args.output_file or input_file | |
encoding = args.encoding | |
memory = args.memory | |
with TempFile(max_size=memory, mode='w+b') as stage: | |
with open(input_file, mode='rt', encoding=encoding) as istr: | |
stage.writelines(line.encode(encoding) for line in trim_white(istr)) | |
stage.seek(0) | |
with open(output_file, 'wb') as ostr: | |
copyfileobj(stage, ostr) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment