Last active
February 21, 2020 14:19
-
-
Save vmonaco/766cf8b1ab399f7dd7a4 to your computer and use it in GitHub Desktop.
CMU keystroke power law
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on May 26, 2015 | |
@author: vinnie, [email protected] | |
Power-law results from: | |
"DATA FORENSIC TECHNIQUES USING BENFORD’S LAW AND ZIPF’S LAW FOR KEYSTROKE | |
DYNAMICS", Aamo Iorliam, Anthony T.S. Ho, Norman Poh, Santosh Tirunagari, | |
and Patrick Bours. IWBF 2015. | |
Uses the data from: | |
"Comparing Anomaly-Detection Algorithms for Keystroke Dynamics," | |
Kevin Killourhy and Roy Maxion. DSN 2009. http://www.cs.cmu.edu/~keystroke/ | |
Requires numpy, pandas, powerlaw, and matplotlib. Run the script as: | |
$ python cmu_powerlaw.py | |
''' | |
import numpy as np | |
import pandas as pd | |
import powerlaw as pl | |
import matplotlib.pyplot as plt | |
FIT_METHOD = 'KS' # Can also be 'Likelihood' | |
DATA_URL = 'http://www.cs.cmu.edu/~keystroke/DSL-StrongPasswordData.csv' | |
# Use none as the x_mins to estimate them | |
UD_XMIN, DD_XMIN, DUR_XMIN = None, None, None | |
# Uncomment line below to use the estimates for x_min, | |
# The resulting x_min estimate from this script is 0.1852, not 0.1818 | |
# UD_XMIN, DD_XMIN, DUR_XMIN = 0.9801, 0.9880, 0.1818 | |
print('Downloading data from', DATA_URL) | |
# First 3 cols are subject, session, repetition | |
df = pd.read_csv(DATA_URL, index_col=[0,1,2]) | |
# Columns are labeled like: feature_type.keyname[.secondkeyname] | |
# Get the columns for a specific feature type | |
get_feature_cols = lambda feat: [c for c in df.columns if c.startswith(feat)] | |
ud = df[get_feature_cols('UD')].values.flatten() # up-down latency | |
dd = df[get_feature_cols('DD')].values.flatten() # down-down latency | |
dur = df[get_feature_cols('H')].values.flatten() # Hold time | |
# UD can be negative, so use abs values | |
ud = np.abs(ud) | |
print('Fitting models, may take a while...') | |
fit_ud = pl.Fit(ud, fit_method=FIT_METHOD, xmin=UD_XMIN) | |
fit_dd = pl.Fit(dd, fit_method=FIT_METHOD, xmin=DD_XMIN) | |
fit_dur = pl.Fit(dur, fit_method=FIT_METHOD, xmin=DUR_XMIN) | |
summarize = lambda fit: 'x_min = %.4f\nalpha = %.4f\nL = %.4f' \ | |
%(fit.power_law.xmin, fit.power_law.alpha, fit.power_law.loglikelihoods(fit.data).sum()) | |
print('Up-down\n', summarize(fit_ud), sep='') | |
print('Down-down\n', summarize(fit_dd), sep='') | |
print('Duration\n', summarize(fit_dur), sep='') | |
# Helper to make a nice plot | |
def make_subplot(name, fit, ax, visiblex=False): | |
plt.setp(ax1.get_xticklabels(), visible=visiblex) | |
fit.plot_ccdf(color='k') | |
fit.power_law.plot_ccdf(color='r', linestyle='--') | |
ax.text(0.9, 0.9, '%s\n%s' %(name, summarize(fit)), | |
ha='right', va='top', transform=ax.transAxes) | |
plt.figure(figsize=(6,9)) | |
ax1 = plt.subplot(311) | |
ax1.set_title('CMU keystroke power laws') | |
make_subplot('Up-down', fit_ud, ax1) | |
ax2 = plt.subplot(312, sharex=ax1) | |
make_subplot('Down-down', fit_dd, ax2) | |
ax3 = plt.subplot(313, sharex=ax1) | |
make_subplot('Duration', fit_dur, ax3, True) | |
plt.tight_layout() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment