Last active
August 29, 2015 14:24
-
-
Save guewen/4792a54a3a58e10ea454 to your computer and use it in GitHub Desktop.
Sync logs from S3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
Dowload new log files from S3. | |
New files are compressed with gzip and only new files are downloaded. | |
""" | |
from __future__ import print_function | |
import argparse | |
import gzip | |
import os | |
import shutil | |
import subprocess | |
import tempfile | |
from contextlib import contextmanager | |
@contextmanager | |
def cd(path): | |
cwd = os.getcwd() | |
os.chdir(path) | |
yield | |
os.chdir(cwd) | |
class LogSync(object): | |
def __init__(self, s3_path, config_file, logs_path): | |
self.s3_path = s3_path | |
self.config_file = config_file | |
self.logs_path = logs_path | |
self._tmpdir = None | |
@property | |
def tmpdir(self): | |
if not self._tmpdir: | |
self._tmpdir = tempfile.mkdtemp() | |
print('Downloading temporary files in %s' % self.tmpdir) | |
return self._tmpdir | |
def sync(self): | |
s3output = subprocess.check_output(['s3cmd', '-c', self.config_file, | |
'ls', self.s3_path]) | |
s3files = (output.split()[3] for output in s3output.split('\n') | |
if output and | |
output.split()[2] != '0') | |
files = [(s3file, s3file.replace(self.s3_path, '')) | |
for s3file in s3files] | |
for s3file, filename in files: | |
compressed_name = filename + '.gz' | |
archive_dest = os.path.join(self.logs_path, compressed_name) | |
if os.path.exists(archive_dest): | |
continue | |
filename = s3file.replace(self.s3_path, '') | |
with cd(self.tmpdir): | |
s3output = subprocess.check_output(['s3cmd', | |
'-c', | |
self.config_file, | |
'get', s3file]) | |
print(s3output.strip()) | |
print('Compressing file to %s' % compressed_name) | |
with open(filename, 'rb') as fh: | |
with gzip.open(compressed_name, 'wb') as gz: | |
gz.writelines(fh) | |
shutil.move(compressed_name, archive_dest) | |
os.remove(filename) | |
print('Got a new archived log: %s' % archive_dest) | |
if __name__ == '__main__': | |
class S3URL(argparse.Action): | |
def __call__(self, parser, args, values, option_string=None): | |
if not values.endswith('/'): | |
values += '/' | |
setattr(args, self.dest, values) | |
class AbsolutePath(argparse.Action): | |
def __call__(self, parser, args, values, option_string=None): | |
values = os.path.abspath(values) | |
setattr(args, self.dest, values) | |
parser = argparse.ArgumentParser() | |
group = parser.add_argument_group('Local') | |
group.add_argument('-o', '--out', required=True, | |
action=AbsolutePath, | |
help="Directory where the compressed logs are stored") | |
group = parser.add_argument_group('S3') | |
group.add_argument('-c', '--config-file', required=True, | |
action=AbsolutePath, | |
help='S3 configuration file') | |
group.add_argument('-p', '--s3-path', required=True, | |
action=S3URL, | |
help='S3 Path, example: s3://logs-erp/xyz/') | |
args = parser.parse_args() | |
log_sync = LogSync(args.s3_path, args.config_file, args.out) | |
log_sync.sync() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment