Last active
May 3, 2017 22:23
-
-
Save stoggi/fdd3d4d0cc1bfdc8600230444bdeb09c to your computer and use it in GitHub Desktop.
Generate start/finish times from PC startup/shutdown logs in the Windows event viewer using Pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas | |
import datetime | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import numpy as np | |
# Input data format: | |
# type date source id none | |
# Information 18/08/2016 8:30:31 a.m. Kernel-General 12 None | |
# Information 17/08/2016 4:51:49 p.m. Kernel-General 13 None | |
# Information 17/08/2016 8:25:02 a.m. Kernel-General 12 None | |
# Information 16/08/2016 5:11:22 p.m. Kernel-General 13 None | |
# 12 = startup, 13 = shutdown | |
# Load data from tab delimited file | |
df = pandas.DataFrame.from_csv('c:/Users/Jeremy.Stott/Desktop/data.txt', sep='\t', parse_dates=False) | |
# Example date 18/08/2016 8:30:31 a.m. format='%d%m%Y %-I:%M:%S %p.' | |
# Parse the date (day first) and then keep a copy in it's own column | |
df['date'] = pandas.to_datetime(df['date'], dayfirst=True) | |
df['datetime'] = df['date'] | |
# Set the index and sort by date | |
df = df.set_index(['date']) | |
df.sort_index() | |
# Convert the startup/shutdown id's to strings | |
df['type'] = np.where(df['id'] == 12, 'start', 'stop') | |
# Create a new dataframe to hold our results | |
df2 = pandas.DataFrame(columns=['start', 'stop', 'duration']) | |
# Resample so that we can get the max or min time for a given day | |
df2.start = df.resample('D').min()['datetime'] | |
df2.stop = df.resample('D').max()['datetime'] | |
# Calculate the delta time difference for the number of hours worked | |
df2.duration = df2.stop - df2.start | |
# Drop days where there is no value for duration | |
df2 = df2[df2['duration'] != pandas.NaT].dropna() | |
# Calculate the number of hours in the duration deltatime object | |
df2['hours'] = df2['duration'] / np.timedelta64(1, 'h') | |
# Drop hours that are 0 | |
df2 = df2[df2.hours > 0.0] | |
# Calcualte our mean | |
avg_hours = df2.hours.median() | |
df2['avg_hours'] = avg_hours | |
# Plot of the number of hours spent at work | |
plt.figure(figsize=(10, 8)) | |
plt.plot(df2.avg_hours, 'b') | |
plt.plot(df2.hours, 'r.') | |
plt.legend(['Average %.2f hours' % (avg_hours,)]) | |
plt.title('Total number of hours spent at work') | |
plt.xlabel('Date') | |
plt.ylabel('Time (h)') | |
plt.savefig('TotalHours.png') | |
plt.savefig('TotalHours.svg') | |
# Plot of the start/stop times | |
plt.figure(figsize=(10, 8)) | |
plt.plot(df2.start.dt.time, 'b.') | |
plt.plot(df2.stop.dt.time, 'r.') | |
plt.legend(['Start Time', 'Finish Time']) | |
plt.title('Start and Finish times') | |
plt.xlabel('Date') | |
plt.ylabel('Time') | |
plt.savefig('StartFinish.png') | |
plt.savefig('StartFinish.svg') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment