Skip to content

Instantly share code, notes, and snippets.

@danzek
Last active January 2, 2018 21:41
Show Gist options
  • Save danzek/67186feb66094de4c635 to your computer and use it in GitHub Desktop.
Save danzek/67186feb66094de4c635 to your computer and use it in GitHub Desktop.
#!/usr/bin/python -tt
# -*- coding: utf-8 -*-
"""
Given the starting line number from the html output (and optional ending line number), parses Internet history
artifacts from a default EnCase 7 HTML report into a nicer looking table. Be sure to customize fields and field widths
as needed.
Copyright 2015, Dan O'Day ([email protected])
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
import codecs
import re
import sys
__author__ = "Dan O'Day"
__license__ = "GNU General Public License"
__status__ = "Development"
def write_string_to_file(s):
"""
Given string, writes to final report file
:param s: string to write to file
"""
with codecs.open('report.html', encoding='utf-8', mode='a+') as of:
of.write(s)
def write_record(record, columns, widths):
"""
Given Internet artifact record, column header names, and column widths, writes record to report file
:param record: dictionary containing Internet artifact row data
:param columns: list containing names of header titles for each column, must match dictionary key names of record
:param widths: list containing cell widths for each column
"""
with codecs.open('report.html', encoding='utf-8', mode='a+') as of:
row_open = '<tr valign="top">'
field_close = '</div></td>'
row_close = '</tr>'
row_data = ''
for c in columns:
width = widths[columns.index(c)]
field_open = '<td class="tab" style="width:{}"><div class="IC" style="width:{}">'.format(width, width)
if record[c]:
row_data += field_open + record[c] + field_close
else:
row_data += field_open + '&nbsp;' + field_close
of.write(''.join([row_open, row_data, row_close]))
def get_line_count(file_name):
"""
Get line count of file
:param file_name: name of file to count lines from
"""
with open(file_name, 'r') as f:
for i, l in enumerate(f):
pass
return i
def process_artifacts(file_name, start_line, end_line=None):
"""
Process Internet artifacts from EnCase 7 html report
:param file_name: name of file containing Internet artifacts from EnCase 7 html report
:param start_line: starting line number of artifacts, should not include title/heading
:param end_line: optional ending line number of artifacts, must be supplied if additional content is presented
in the html report after the Internet artifacts
"""
# header and footer placeholders, line count
header = ''
footer = ''
if end_line:
line_count = end_line
else:
footer = '</body></html>'
line_count = get_line_count(file_name)
table_header = '<div style="font-size:9pt;color:#000000;padding-left:15pt;"><table border="1" cellspacing="0" ' \
'cellpadding="0">'
table_footer = '</table></div>'
# generate table header row
table_title_template_open = """<td class="tab" align="center" style="background-color:#bebebe;color:#000000;"""
table_title_template_close = """</div></td>"""
columns = ['#', 'Source', 'Item Path', 'Type', 'Visit Count', 'URL Name', 'Record Last Accessed',
'Internet Artifact Type', 'Title', 'Browser Type', 'Profile']
widths = ['38', '75', '100', '25', '40', '200', '75', '100', '150', '100', '60']
table_title_row = ''
for c in columns:
width = widths[columns.index(c)]
table_title_row += table_title_template_open + \
'width:{}"><div class="IC" style="width:{}">'.format(width, width) + c + table_title_template_close
with codecs.open(file_name, 'r', "utf-8") as f:
# regex patterns for data types
new_record_pattern = re.compile(r'>(\d+)\)')
source_pattern = re.compile(r'&nbsp;(.+)<br/>')
item_path_pattern = re.compile(r'>Item Path\t(.*)<br/>')
type_pattern = re.compile(r'>Type</div>.*class="IC".*>(.*)</div></td>')
visit_count_pattern = re.compile(r'>Visit Count<.*class="IC".*>(.*)</div></td>')
url_name_pattern = re.compile(r'>Url Name</div>.*class="IC".*>(.*)</div></td>')
last_access_pattern = re.compile(r'>Record Last Accessed</div>.*class="IC".*>(.*)</div></td>')
artifact_pattern = re.compile(r'>Internet Artifact Type</div>.*class="IC".*>(.*)</div></td>')
title_pattern = re.compile(r'>Title</div>.*class="IC".*>(.*)</div></td>')
browser_pattern = re.compile(r'>Browser Type</div>.*class="IC".*>(.*)</div></td>')
profile_pattern = re.compile(r'>Profile Name</div>.*class="IC".*>(.*)</div></td>')
current_record = {k: None for k in columns}
current_record_number = 0
# iterate through each line
for ln, line in enumerate(f):
# progress bar
percent_complete = int((float(ln + 1) / line_count) * 100)
sys.stdout.write('\r')
print '[' + ('|' * (percent_complete / 2)) + ('.' * (50 - (percent_complete / 2))) + \
'] {}%'.format(percent_complete),
sys.stdout.flush()
if (ln + 1) < start_line:
header += line
# add custom css class for report
if '<style type="text/css">' in line:
header += '.tab {font-family:"Calibri";font-size:10pt;margin:0px;padding:2;word-wrap:break-word;' \
'overflow:hidden;border-color:black;border-style:solid}'
elif ((ln + 1) >= start_line) and (ln <= line_count):
if (ln + 1) == start_line:
# write initial report data
write_string_to_file(header)
write_string_to_file(table_header)
write_string_to_file(table_title_row)
if new_record_pattern.search(line): # test for new record
# write old record to file
if current_record_number > 0:
write_record(current_record, columns, widths)
# begin new record
current_record_number += 1 # increment current record counter
current_record = {k: None for k in columns} # initialize temp dictionary
current_record['#'] = str(current_record_number)
current_record['Source'] = source_pattern.search(line).group(1)
continue
elif '>Item Path' in line:
current_record['Item Path'] = item_path_pattern.search(line).group(1)
continue
elif '>Type</div>' in line:
current_record['Type'] = type_pattern.search(line).group(1)
continue
elif '>Visit Count<' in line:
current_record['Visit Count'] = visit_count_pattern.search(line).group(1)
continue
elif '>Url Name<' in line:
current_record['URL Name'] = url_name_pattern.search(line).group(1)
continue
elif '>Record Last Accessed<' in line:
current_record['Record Last Accessed'] = last_access_pattern.search(line).group(1)
continue
elif '>Internet Artifact Type<' in line:
current_record['Internet Artifact Type'] = artifact_pattern.search(line).group(1)
continue
elif '>Title<' in line:
current_record['Title'] = title_pattern.search(line).group(1)
continue
elif '>Browser Type<' in line:
current_record['Browser Type'] = browser_pattern.search(line).group(1)
continue
elif '>Profile Name<' in line:
current_record['Profile'] = profile_pattern.search(line).group(1)
continue
elif ln > end_line:
footer += line
write_record(current_record, columns, widths)
write_string_to_file(table_footer)
write_string_to_file(footer)
def main():
"""
main() function
:return:
"""
# check for CLI parameters
if not sys.argv[1] and sys.argv[2]:
print 'you need to specify a report file and starting line (and optional ending line)'
sys.exit(1)
# validate starting line number
try:
start_line = int(sys.argv[2])
except ValueError:
print 'not a valid number'
sys.exit(1)
# check for optional end line parameter and validate if present
try:
if sys.argv[3]:
try:
end_line = int(sys.argv[3])
process_artifacts(sys.argv[1], start_line, end_line)
except ValueError:
print 'not a valid number'
sys.exit(1)
except IndexError:
process_artifacts(sys.argv[1], start_line)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment