deeso · April 3, 2020 04:17
diff --git a/is_there_abuse.py b/is_there_abuse.py
 # Using pandas to look for potential token abuse in logs
 import pandas as pd
 import json
 from dateutil import parser
 print('Loading data')
 # keys in data now: clientip date date_key_day date_key_hour date_key_minute date_key_month dst_host app_id src_host status ait url username
 # assuming splunk results with 'results' containing the required results
 data = [json.loads(i)['result'] for i in open('looking-token-abuse.json').readlines()]
 abusable_url = 'APP_NAME'

 # '2009-09-01T23:17:01.212+0000'
 discretize_key_fmt_minute = "{year}-{month:02d}-{day:02d}-{hour:02d}-{minute:02d}"
 discretize_key_fmt_hour = "{year}-{month:02d}-{day:02d}-{hour:02d}"
 discretize_key_fmt_day = "{year}-{month:02d}-{day:02d}"
 discretize_key_fmt_month = "{year}-{month}"
 build_date_key_minute = lambda dt_x: discretize_key_fmt_minute.format(**{"year":dt_x.year,
                                                              "month":dt_x.month,
                                                              "day":dt_x.day,
                                                              "hour":dt_x.hour,
                                                              "minute":dt_x.minute,})

 build_date_key_hour = lambda dt_x: discretize_key_fmt_hour.format(**{"year":dt_x.year,
                                                              "month":dt_x.month,
                                                              "day":dt_x.day,
                                                              "hour":dt_x.hour,})

 build_date_key_day = lambda dt_x: discretize_key_fmt_day.format(**{"year":dt_x.year,
                                                              "month":dt_x.month,
                                                              "day":dt_x.day,})

 build_date_key_month = lambda dt_x: discretize_key_fmt_month.format(**{"year":dt_x.year,
                                                              "month":dt_x.month,})


 # important keys: status, clientip, app_id username, date
 # token abuse can only happen in a 90s window
 # aits are identifiers to track authenticated users
 app_activity_df = pd.DataFrame(data).sort_values('date', ignore_index=True)
 aits = sorted([i for i in app_activity_df.aits.unique()])

 ait_potential_abuse = []
 abuse_window = 90 # seconds
 abusable_app = abusable_url
 for ait in aits:
    ait_df = token_activity_df[token_activity_df['ait'].str.match(ait)].sort_values('date')
    rows = [d for d in ait_df.iloc]
    if len(rows) < 2:
      continue
    current_date = rows[0].date
    current_app_url = rows[0].app_url
    for row in rows[1:]:
        if current_app_url != abusable_app:
           current_app_url = row.app_url
           current_date = row.date
           continue
        if (row.date - current).seconds <= abuse_window:
            aits_potential_abuse.append([ait, current_app_url, current_date, row.app_url, row.date])
            break
        current_date = row.date
        current_app_url = row.app_url

 if len(aits_potential_abuse) > 0:
    for ait, app_url, date, target_app, target_date in aits_potential_abuse:
        print("[{}] {} {} --> {} {}".format(ait, app_url, date, target_app))
 else:
    print("No abuse detected.")
	# Using pandas to look for potential token abuse in logs
	import pandas as pd
	import json
	from dateutil import parser
	print('Loading data')
	# keys in data now: clientip date date_key_day date_key_hour date_key_minute date_key_month dst_host app_id src_host status ait url username
	# assuming splunk results with 'results' containing the required results
	data = [json.loads(i)['result'] for i in open('looking-token-abuse.json').readlines()]
	abusable_url = 'APP_NAME'

	# '2009-09-01T23:17:01.212+0000'
	discretize_key_fmt_minute = "{year}-{month:02d}-{day:02d}-{hour:02d}-{minute:02d}"
	discretize_key_fmt_hour = "{year}-{month:02d}-{day:02d}-{hour:02d}"
	discretize_key_fmt_day = "{year}-{month:02d}-{day:02d}"
	discretize_key_fmt_month = "{year}-{month}"
	build_date_key_minute = lambda dt_x: discretize_key_fmt_minute.format(**{"year":dt_x.year,
	"month":dt_x.month,
	"day":dt_x.day,
	"hour":dt_x.hour,
	"minute":dt_x.minute,})

	build_date_key_hour = lambda dt_x: discretize_key_fmt_hour.format(**{"year":dt_x.year,
	"month":dt_x.month,
	"day":dt_x.day,
	"hour":dt_x.hour,})

	build_date_key_day = lambda dt_x: discretize_key_fmt_day.format(**{"year":dt_x.year,
	"month":dt_x.month,
	"day":dt_x.day,})

	build_date_key_month = lambda dt_x: discretize_key_fmt_month.format(**{"year":dt_x.year,
	"month":dt_x.month,})


	# important keys: status, clientip, app_id username, date
	# token abuse can only happen in a 90s window
	# aits are identifiers to track authenticated users
	app_activity_df = pd.DataFrame(data).sort_values('date', ignore_index=True)
	aits = sorted([i for i in app_activity_df.aits.unique()])

	ait_potential_abuse = []
	abuse_window = 90 # seconds
	abusable_app = abusable_url
	for ait in aits:
	ait_df = token_activity_df[token_activity_df['ait'].str.match(ait)].sort_values('date')
	rows = [d for d in ait_df.iloc]
	if len(rows) < 2:
	continue
	current_date = rows[0].date
	current_app_url = rows[0].app_url
	for row in rows[1:]:
	if current_app_url != abusable_app:
	current_app_url = row.app_url
	current_date = row.date
	continue
	if (row.date - current).seconds <= abuse_window:
	aits_potential_abuse.append([ait, current_app_url, current_date, row.app_url, row.date])
	break
	current_date = row.date
	current_app_url = row.app_url

	if len(aits_potential_abuse) > 0:
	for ait, app_url, date, target_app, target_date in aits_potential_abuse:
	print("[{}] {} {} --> {} {}".format(ait, app_url, date, target_app))
	else:
	print("No abuse detected.")