Created
June 5, 2015 13:07
-
-
Save Ralnoc/36e94a5f141084a050f7 to your computer and use it in GitHub Desktop.
Extract Apache combined access log lines within a defined time range and send to either local destination or remote destination using SFTP.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
import datetime | |
import paramiko | |
import re | |
import tempfile | |
def parse_args(): | |
import optparse | |
import sys | |
parser = optparse.OptionParser() | |
parser.add_option('--start', action='store', type='string', | |
dest='time_start', | |
default=datetime.date.today().strftime("%d/%b/%Y") + ':18:00:00', | |
help='Time to start collecting logs. (Defaults to Today at 6pm)', | |
metavar="03/Jun/2015:18:00:00") | |
parser.add_option('--end', action='store', type='string', | |
dest='time_end', | |
default=datetime.date.today().strftime("%d/%b/%Y") + ':21:00:00', | |
help='Time to end collecting logs. (Defaults to Today at 9pm)', | |
metavar="03/Jun/2015:21:00:00") | |
parser.add_option('-d', '--dest', action='store', dest='destination', | |
default=False, help='Destination /path/to/file') | |
parser.add_option('-f', '--file', action='store', dest='file_name', | |
default=False, help='Source /path/to/file') | |
parser.add_option('-s', '--server', action='store', dest='server', | |
default=False, help='Destination sftp server') | |
parser.add_option('-p', '--port', action='store', dest='port', | |
default=22, help='destination sftp server port') | |
parser.add_option('-u', '--username', action='store', dest='username', | |
default=False, help='destination sftp server username') | |
parser.add_option('-P', '--password', action='store', dest='password', | |
default=False, help='destination sftp server password') | |
(opts, args) = parser.parse_args() | |
if not opts.file_name: | |
print 'You must specify a source file with -f, --file.' | |
sys.exit(1) | |
if (not opts.destination and not opts.server) or (opts.server and (bool(opts.destination) != bool(opts.server))): | |
print('You must specify either a destination file with -d, --dest or a' + | |
' destination server with -s, --server and a destination file with -d. --dest.') | |
sys.exit(1) | |
if opts.server and not (opts.username and opts.password): | |
print 'You must specify a usename and password for the remote server with -u/--username and -P/--password.' | |
sys.exit(1) | |
return (opts, args) | |
def timeit(method): | |
''' Decorator to print out the amount of time it too to run a function. ''' | |
import time | |
def timed(*args, **kw): | |
ts = time.time() | |
result = method(*args, **kw) | |
te = time.time() | |
print 'Time to Complete function (%r): %2.5f sec' % \ | |
(method.__name__, te-ts) | |
return result | |
return timed | |
@timeit | |
def main(): | |
(opts, args) = parse_args() | |
# Match against start of combined log and extract it into named groups | |
pattern = re.compile('(?P<ipaddress>[0-9.]+) (?P<identd_tag>[a-zA-z0-9._-]+) (?P<userid>[a-zA-z0-9._-]+) \[(?P<timestamp>[0-9]+/[a-zA-Z]+/[0-9]+:[0-9]+:[0-9]+:[0-9]+) [0-9-]*\].*') | |
# Generate start of time range time struct | |
time_start_struct = datetime.datetime.strptime(opts.time_start, "%d/%b/%Y:%H:%M:%S") | |
# Generate end of time range time struct | |
time_end_struct = datetime.datetime.strptime(opts.time_end, "%d/%b/%Y:%H:%M:%S") | |
# create temporary file for storing data | |
tmp_fd = tempfile.TemporaryFile() | |
# Initialize sftp connection if using server | |
if opts.server: | |
# Open a transport | |
transport = paramiko.Transport((opts.server, opts.port)) | |
# Auth | |
transport.connect(username = opts.username, password = opts.password) | |
# Connect to server | |
sftp = paramiko.SFTPClient.from_transport(transport) | |
# Parse out matching log lines into temporary file | |
for line in open(opts.file_name, 'r'): | |
# Extract timestamp from regex object and convert it to datetime struct | |
timestamp_struct = datetime.datetime.strptime(pattern.match(line).groupdict()["timestamp"], "%d/%b/%Y:%H:%M:%S") | |
# Validate that log line happens within defined time window | |
if time_start_struct < timestamp_struct and time_end_struct > timestamp_struct: | |
# Write matching line to temporary file | |
tmp_fd.write(line) | |
# Reset temporary file to position zero for destination processing | |
tmp_fd.seek(0) | |
# Check if server option is set | |
if opts.server: | |
# Send content to remote server | |
sftp.putfo(tmp_fd, opts.destination) | |
# Close the SFTP Connection | |
sftp.close() | |
else: | |
# Open local file for writing | |
dest_fd = open(opts.destination, 'a') | |
# Write the contents of the temporary file to a local file | |
for line in tmp_fd.readlines(): | |
dest_fd.write(line) | |
# Close the local file | |
dest_fd.close() | |
# Close and remove temporary file | |
tmp_fd.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment