Last active
January 2, 2018 21:41
-
-
Save danzek/67186feb66094de4c635 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python -tt | |
# -*- coding: utf-8 -*- | |
""" | |
Given the starting line number from the html output (and optional ending line number), parses Internet history | |
artifacts from a default EnCase 7 HTML report into a nicer looking table. Be sure to customize fields and field widths | |
as needed. | |
Copyright 2015, Dan O'Day ([email protected]) | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | |
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | |
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
""" | |
import codecs | |
import re | |
import sys | |
__author__ = "Dan O'Day" | |
__license__ = "GNU General Public License" | |
__status__ = "Development" | |
def write_string_to_file(s): | |
""" | |
Given string, writes to final report file | |
:param s: string to write to file | |
""" | |
with codecs.open('report.html', encoding='utf-8', mode='a+') as of: | |
of.write(s) | |
def write_record(record, columns, widths): | |
""" | |
Given Internet artifact record, column header names, and column widths, writes record to report file | |
:param record: dictionary containing Internet artifact row data | |
:param columns: list containing names of header titles for each column, must match dictionary key names of record | |
:param widths: list containing cell widths for each column | |
""" | |
with codecs.open('report.html', encoding='utf-8', mode='a+') as of: | |
row_open = '<tr valign="top">' | |
field_close = '</div></td>' | |
row_close = '</tr>' | |
row_data = '' | |
for c in columns: | |
width = widths[columns.index(c)] | |
field_open = '<td class="tab" style="width:{}"><div class="IC" style="width:{}">'.format(width, width) | |
if record[c]: | |
row_data += field_open + record[c] + field_close | |
else: | |
row_data += field_open + ' ' + field_close | |
of.write(''.join([row_open, row_data, row_close])) | |
def get_line_count(file_name): | |
""" | |
Get line count of file | |
:param file_name: name of file to count lines from | |
""" | |
with open(file_name, 'r') as f: | |
for i, l in enumerate(f): | |
pass | |
return i | |
def process_artifacts(file_name, start_line, end_line=None): | |
""" | |
Process Internet artifacts from EnCase 7 html report | |
:param file_name: name of file containing Internet artifacts from EnCase 7 html report | |
:param start_line: starting line number of artifacts, should not include title/heading | |
:param end_line: optional ending line number of artifacts, must be supplied if additional content is presented | |
in the html report after the Internet artifacts | |
""" | |
# header and footer placeholders, line count | |
header = '' | |
footer = '' | |
if end_line: | |
line_count = end_line | |
else: | |
footer = '</body></html>' | |
line_count = get_line_count(file_name) | |
table_header = '<div style="font-size:9pt;color:#000000;padding-left:15pt;"><table border="1" cellspacing="0" ' \ | |
'cellpadding="0">' | |
table_footer = '</table></div>' | |
# generate table header row | |
table_title_template_open = """<td class="tab" align="center" style="background-color:#bebebe;color:#000000;""" | |
table_title_template_close = """</div></td>""" | |
columns = ['#', 'Source', 'Item Path', 'Type', 'Visit Count', 'URL Name', 'Record Last Accessed', | |
'Internet Artifact Type', 'Title', 'Browser Type', 'Profile'] | |
widths = ['38', '75', '100', '25', '40', '200', '75', '100', '150', '100', '60'] | |
table_title_row = '' | |
for c in columns: | |
width = widths[columns.index(c)] | |
table_title_row += table_title_template_open + \ | |
'width:{}"><div class="IC" style="width:{}">'.format(width, width) + c + table_title_template_close | |
with codecs.open(file_name, 'r', "utf-8") as f: | |
# regex patterns for data types | |
new_record_pattern = re.compile(r'>(\d+)\)') | |
source_pattern = re.compile(r' (.+)<br/>') | |
item_path_pattern = re.compile(r'>Item Path\t(.*)<br/>') | |
type_pattern = re.compile(r'>Type</div>.*class="IC".*>(.*)</div></td>') | |
visit_count_pattern = re.compile(r'>Visit Count<.*class="IC".*>(.*)</div></td>') | |
url_name_pattern = re.compile(r'>Url Name</div>.*class="IC".*>(.*)</div></td>') | |
last_access_pattern = re.compile(r'>Record Last Accessed</div>.*class="IC".*>(.*)</div></td>') | |
artifact_pattern = re.compile(r'>Internet Artifact Type</div>.*class="IC".*>(.*)</div></td>') | |
title_pattern = re.compile(r'>Title</div>.*class="IC".*>(.*)</div></td>') | |
browser_pattern = re.compile(r'>Browser Type</div>.*class="IC".*>(.*)</div></td>') | |
profile_pattern = re.compile(r'>Profile Name</div>.*class="IC".*>(.*)</div></td>') | |
current_record = {k: None for k in columns} | |
current_record_number = 0 | |
# iterate through each line | |
for ln, line in enumerate(f): | |
# progress bar | |
percent_complete = int((float(ln + 1) / line_count) * 100) | |
sys.stdout.write('\r') | |
print '[' + ('|' * (percent_complete / 2)) + ('.' * (50 - (percent_complete / 2))) + \ | |
'] {}%'.format(percent_complete), | |
sys.stdout.flush() | |
if (ln + 1) < start_line: | |
header += line | |
# add custom css class for report | |
if '<style type="text/css">' in line: | |
header += '.tab {font-family:"Calibri";font-size:10pt;margin:0px;padding:2;word-wrap:break-word;' \ | |
'overflow:hidden;border-color:black;border-style:solid}' | |
elif ((ln + 1) >= start_line) and (ln <= line_count): | |
if (ln + 1) == start_line: | |
# write initial report data | |
write_string_to_file(header) | |
write_string_to_file(table_header) | |
write_string_to_file(table_title_row) | |
if new_record_pattern.search(line): # test for new record | |
# write old record to file | |
if current_record_number > 0: | |
write_record(current_record, columns, widths) | |
# begin new record | |
current_record_number += 1 # increment current record counter | |
current_record = {k: None for k in columns} # initialize temp dictionary | |
current_record['#'] = str(current_record_number) | |
current_record['Source'] = source_pattern.search(line).group(1) | |
continue | |
elif '>Item Path' in line: | |
current_record['Item Path'] = item_path_pattern.search(line).group(1) | |
continue | |
elif '>Type</div>' in line: | |
current_record['Type'] = type_pattern.search(line).group(1) | |
continue | |
elif '>Visit Count<' in line: | |
current_record['Visit Count'] = visit_count_pattern.search(line).group(1) | |
continue | |
elif '>Url Name<' in line: | |
current_record['URL Name'] = url_name_pattern.search(line).group(1) | |
continue | |
elif '>Record Last Accessed<' in line: | |
current_record['Record Last Accessed'] = last_access_pattern.search(line).group(1) | |
continue | |
elif '>Internet Artifact Type<' in line: | |
current_record['Internet Artifact Type'] = artifact_pattern.search(line).group(1) | |
continue | |
elif '>Title<' in line: | |
current_record['Title'] = title_pattern.search(line).group(1) | |
continue | |
elif '>Browser Type<' in line: | |
current_record['Browser Type'] = browser_pattern.search(line).group(1) | |
continue | |
elif '>Profile Name<' in line: | |
current_record['Profile'] = profile_pattern.search(line).group(1) | |
continue | |
elif ln > end_line: | |
footer += line | |
write_record(current_record, columns, widths) | |
write_string_to_file(table_footer) | |
write_string_to_file(footer) | |
def main(): | |
""" | |
main() function | |
:return: | |
""" | |
# check for CLI parameters | |
if not sys.argv[1] and sys.argv[2]: | |
print 'you need to specify a report file and starting line (and optional ending line)' | |
sys.exit(1) | |
# validate starting line number | |
try: | |
start_line = int(sys.argv[2]) | |
except ValueError: | |
print 'not a valid number' | |
sys.exit(1) | |
# check for optional end line parameter and validate if present | |
try: | |
if sys.argv[3]: | |
try: | |
end_line = int(sys.argv[3]) | |
process_artifacts(sys.argv[1], start_line, end_line) | |
except ValueError: | |
print 'not a valid number' | |
sys.exit(1) | |
except IndexError: | |
process_artifacts(sys.argv[1], start_line) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment