Created
August 17, 2014 06:08
-
-
Save chrisparnin/bc558a27256d673cf5ba to your computer and use it in GitHub Desktop.
Export python tutor apache log to CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import urllib.parse | |
import re | |
import tempfile | |
import os | |
import csv | |
import hashlib | |
from subprocess import STDOUT,CalledProcessError,check_output | |
SCRIPT_START = u".py?user_script=" | |
SCRIPT_END = u' HTTP/1.0"' | |
IP_START = ".gz:" | |
IP_END = " - - [" | |
TIME_START = " - - [" | |
TIME_END = "]" | |
PYTHON2_PATH = "C:/Python27/python.exe"; | |
PYTHON3_PATH = "C:/tools/python/python.exe"; | |
def importFile(fileName): | |
rows = [] | |
with open(fileName) as f: | |
content = f.readlines() | |
for line in content: | |
# Blank entries. | |
if line.find(".py?user_script=&") >= 0: | |
continue | |
try: | |
scriptStart = line.find(SCRIPT_START) | |
if scriptStart == -1: | |
continue | |
scriptEnd = line.find(SCRIPT_END) | |
ipStart = line.index(IP_START)+len(IP_START) | |
ipEnd = line.find(IP_END) | |
timeStart = line.index(TIME_START)+len(TIME_START) | |
timeEnd = line.find(TIME_END) | |
urlFragment = line[scriptStart:scriptEnd] | |
ip = line[ipStart:ipEnd] | |
time = line[timeStart:timeEnd] | |
version = getPythonVersion(line) | |
parsed = urllib.parse.urlparse("/"+urlFragment) | |
snippet = urllib.parse.parse_qs(parsed.query)['user_script'][0] | |
#output = createAndExecuteTempFile(snippet, getPythonPath(line)) | |
newId = hashlib.md5(ip.encode('utf-8')).hexdigest() | |
rows.append( (newId, time, version, snippet) ) | |
except: | |
e = sys.exc_info()[0] | |
print (e,"%",line,"%") | |
return rows | |
def exportAsCSV(rows, destFile): | |
with open(destFile, 'w', encoding="utf-8") as output: | |
writer = csv.writer(output, delimiter=',', dialect=csv.excel,lineterminator='\n') | |
writer.writerows([("id","time","version","snippet")]) | |
for row in rows: | |
writer.writerows([row]) | |
def createAndExecuteTempFile(content, pythonPath): | |
fileTemp = tempfile.NamedTemporaryFile(mode='w+t',delete=False) | |
name = fileTemp.name | |
try: | |
fileTemp.write(content) | |
fileTemp.close() | |
output = check_output([pythonPath, name],stderr=STDOUT, timeout=5) | |
return output | |
except CalledProcessError as e: | |
#print (e.returncode, e.output) | |
return None | |
finally: | |
os.remove(name) | |
# Figure out if should be executing python 2 or 3 | |
def getPythonPath(urlFragment): | |
if urlFragment.find("\"GET /web_exec_py2-1.py?") >= 0: | |
return PYTHON2_PATH | |
if urlFragment.find("\"GET /web_exec_py3-1.py?") >= 0: | |
return PYTHON3_PATH | |
return "" | |
def getPythonVersion(urlFragment): | |
if urlFragment.find("\"GET /web_exec_py2-1.py?") >= 0: | |
return "2" | |
if urlFragment.find("\"GET /web_exec_py3-1.py?") >= 0: | |
return "3" | |
return "" | |
if len(sys.argv) == 3: | |
rows = importFile(sys.argv[1] ) | |
exportAsCSV(rows, sys.argv[2] ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment