Created
October 31, 2012 06:13
-
-
Save elprup/3985285 to your computer and use it in GitHub Desktop.
copy plain log to category log in s3 system
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
''' | |
s3load | |
Version 0.2 2012-11-09 | |
copy plain log to category log in s3 system | |
usage: | |
s3load.py source destination | |
''' | |
import re | |
import os | |
import sys | |
import datetime | |
import logging | |
logging.getLogger().setLevel(logging.DEBUG) | |
def get_file_date(file_path): | |
''' | |
get log file date by file name, eg. filename-2012-11-11-0000.lzo | |
you can define this function by yourself. | |
''' | |
file_info = re.match('^.*-([0-9]+)-([0-9]+)-([0-9]+)_[0-9]+\.lzo$', file_path) | |
logging.debug('filecheck: %s, got result %s' % (file_path, file_info)) | |
if file_info is None: | |
logging.debug('filecheck: fail to fetch date') | |
return None | |
try: | |
file_info = file_info.groups() | |
year, month, day = int(file_info[0]), int(file_info[1]), int(file_info[2]) | |
except: | |
logging.debug('filecheck: fail to initial year,month,day') | |
return None | |
file_date = datetime.date(year, month, day) | |
return file_date | |
def get_file_list(path): | |
''' get only file names without complete path ''' | |
cmd = 's3cmd ls %s -r' % path | |
response = os.popen(cmd) | |
file_list = [] | |
for line in response.xreadlines(): | |
try: | |
file_path = line.strip().split()[3].split('/')[-1] | |
except: | |
continue | |
file_list.append(file_path) | |
response.close() | |
return file_list | |
def s3_copy(src, dst): | |
''' sync file to s3 if file not exited in s3''' | |
cmd = 's3cmd cp %s %s' % (src, dst) | |
logging.info(cmd) | |
os.system(cmd) | |
def main(argv): | |
# s3load.py source_root dest_root | |
source_root, dest_root = argv[0], argv[1] | |
if source_root[-1] != '/': | |
source_root += '/' | |
if dest_root[-1] != '/': | |
dest_root += '/' | |
src_files = get_file_list(source_root) | |
dst_files = get_file_list(dest_root) | |
copy_files = set(src_files) - set(dst_files) | |
for path in copy_files: | |
file_path = source_root + path | |
file_date = get_file_date(file_path) | |
if file_date is None: | |
continue | |
s3_copy(file_path, '/'.join([dest_root[:-1],'dt=%s' % file_date.strftime('%Y-%m-%d'), '']) ) | |
if __name__ == '__main__': | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment