Created
April 3, 2020 04:17
-
-
Save deeso/684df2276b344a0f919a6f473399028e to your computer and use it in GitHub Desktop.
a small recipe to help with analyzing logs with timestamps using pandas. Trying to detect if any abuse happens between in a window of 90s after a user has authenticated to the target application
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using pandas to look for potential token abuse in logs | |
import pandas as pd | |
import json | |
from dateutil import parser | |
print('Loading data') | |
# keys in data now: clientip date date_key_day date_key_hour date_key_minute date_key_month dst_host app_id src_host status ait url username | |
# assuming splunk results with 'results' containing the required results | |
data = [json.loads(i)['result'] for i in open('looking-token-abuse.json').readlines()] | |
abusable_url = 'APP_NAME' | |
# '2009-09-01T23:17:01.212+0000' | |
discretize_key_fmt_minute = "{year}-{month:02d}-{day:02d}-{hour:02d}-{minute:02d}" | |
discretize_key_fmt_hour = "{year}-{month:02d}-{day:02d}-{hour:02d}" | |
discretize_key_fmt_day = "{year}-{month:02d}-{day:02d}" | |
discretize_key_fmt_month = "{year}-{month}" | |
build_date_key_minute = lambda dt_x: discretize_key_fmt_minute.format(**{"year":dt_x.year, | |
"month":dt_x.month, | |
"day":dt_x.day, | |
"hour":dt_x.hour, | |
"minute":dt_x.minute,}) | |
build_date_key_hour = lambda dt_x: discretize_key_fmt_hour.format(**{"year":dt_x.year, | |
"month":dt_x.month, | |
"day":dt_x.day, | |
"hour":dt_x.hour,}) | |
build_date_key_day = lambda dt_x: discretize_key_fmt_day.format(**{"year":dt_x.year, | |
"month":dt_x.month, | |
"day":dt_x.day,}) | |
build_date_key_month = lambda dt_x: discretize_key_fmt_month.format(**{"year":dt_x.year, | |
"month":dt_x.month,}) | |
# important keys: status, clientip, app_id username, date | |
# token abuse can only happen in a 90s window | |
# aits are identifiers to track authenticated users | |
app_activity_df = pd.DataFrame(data).sort_values('date', ignore_index=True) | |
aits = sorted([i for i in app_activity_df.aits.unique()]) | |
ait_potential_abuse = [] | |
abuse_window = 90 # seconds | |
abusable_app = abusable_url | |
for ait in aits: | |
ait_df = token_activity_df[token_activity_df['ait'].str.match(ait)].sort_values('date') | |
rows = [d for d in ait_df.iloc] | |
if len(rows) < 2: | |
continue | |
current_date = rows[0].date | |
current_app_url = rows[0].app_url | |
for row in rows[1:]: | |
if current_app_url != abusable_app: | |
current_app_url = row.app_url | |
current_date = row.date | |
continue | |
if (row.date - current).seconds <= abuse_window: | |
aits_potential_abuse.append([ait, current_app_url, current_date, row.app_url, row.date]) | |
break | |
current_date = row.date | |
current_app_url = row.app_url | |
if len(aits_potential_abuse) > 0: | |
for ait, app_url, date, target_app, target_date in aits_potential_abuse: | |
print("[{}] {} {} --> {} {}".format(ait, app_url, date, target_app)) | |
else: | |
print("No abuse detected.") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment