Skip to content

Instantly share code, notes, and snippets.

@mumumu
Last active August 29, 2015 13:57
Show Gist options
  • Save mumumu/9658238 to your computer and use it in GitHub Desktop.
Save mumumu/9658238 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from benchmarker import Benchmarker
import os
import bz2
import glob
import fnmatch
from shutil import copyfileobj
base_dir = '/home/mumumu/bigfile_bench'
def concat_files(target_files, dest_dir):
concated_file_path = os.path.join(base_dir, 'concated.txt')
with open(concated_file_path, 'w') as dest:
for f in target_files:
with open(f, 'r') as src:
copyfileobj(src, dest)
return concated_file_path
def bzip2_file(target_file):
with bz2.BZ2File(target_file + '.bz2', 'w') as bz2file:
with open(target_file, 'r') as f:
for line in f:
bz2file.write(line)
if __name__ == '__main__':
pattern = 'test_*.txt'
target_dir = base_dir
dest_file = None
with Benchmarker(width=20) as bm:
for delete_file in glob.glob("concated.txt*"):
os.remove(delete_file)
target_files = sorted([os.path.join(target_dir, f)
for f in os.listdir(target_dir) if fnmatch.fnmatch(f, pattern)])
with bm('cat by pure python'):
dest_file = concat_files(target_files, base_dir)
with bm('bz2 by bz2.BZ2File'):
bzip2_file(dest_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment