Skip to content

Instantly share code, notes, and snippets.

@brevityinmotion
Created July 28, 2021 03:27
Show Gist options
  • Save brevityinmotion/25f20271e682a67e90022eef72438a16 to your computer and use it in GitHub Desktop.
Save brevityinmotion/25f20271e682a67e90022eef72438a16 to your computer and use it in GitHub Desktop.
Lambda function to parse and normalize scope data
import ast
import re
import urllib.request, json
from urllib.parse import urlparse
def parseScopeIn(scopeIn):
targetData = []
if not scopeIn:
return targetData
smallAll = str(scopeIn)[1:-1]
scopeLength = len(scopeIn)
smallData = ast.literal_eval(smallAll)
if (scopeLength > 1):
for item in smallData:
if item.get('target') is not None:
targetData.append(item.get('target'))
if item.get('asset_identifier') is not None:
targetData.append(item.get('asset_identifier'))
return targetData
else:
if smallData.get('target') is not None:
targetData.append(smallData.get('target'))
if smallData.get('asset_identifier') is not None:
targetData.append(smallData.get('asset_identifier'))
return targetData
def parseScopeOut(scopeOut):
targetData = []
if not scopeOut:
targetData.append('icicles.io')
return targetData
smallAll = str(scopeOut)[1:-1]
scopeLength = len(scopeOut)
smallData = ast.literal_eval(smallAll)
if (scopeLength > 1):
for item in smallData:
if item.get('target') is not None:
targetData.append(item.get('target'))
if item.get('asset_identifier') is not None:
targetData.append(item.get('asset_identifier'))
return targetData
else:
if smallData.get('target') is not None:
targetData.append(smallData.get('target'))
if smallData.get('asset_identifier') is not None:
targetData.append(smallData.get('asset_identifier'))
return targetData
def parseProgramUrl(programUrl):
programName = programUrl.rsplit('/', 1)[-1]
programName = ''.join(e for e in programName if e.isalnum())
return programName
def parseProgramName(programName):
programName = ''.join(e for e in programName if e.isalnum())
return programName
def cleanScope(sub, test_str):
for ele in sub:
if ele in test_str:
return 1
return 0
def cleanupScopeGithub(dfIn):
matchString = 'github.com'
matches = []
for match in dfIn:
#if matchString in match:
match = re.search("(?P<url>https?://[^\s]+)", match)#.group("url")
if match is None:
continue
else:
match = match.group('url')
if matchString in match:
matches.append(match)
else:
continue
return matches
def cleanupScopeStrict(dfIn):
matchString = 'github.com'
matches = []
for match in dfIn:
#if matchString in match:
match = re.search("(?P<url>https?://[^\s]+)", match)#.group("url")
if match is None:
continue
else:
match = match.group('url')
if matchString in match:
continue
else:
matches.append(match)
return matches
def cleanupScopeWild(dfIn):
matchString = '\*.'
matches = []
for match in dfIn:
match = re.search("(?P<url>[*][^\s|\,]+)", match)
#match = re.search("(?P<url>[*][^\s]+)", match)
if match is None:
continue
else:
match = match.group('url')
#if matchString in match:
matches.append(match)
#else:
# continue
return matches
def cleanupScopeIP(dfIn):
matchString = '\*.'
matches = []
for match in dfIn:
# This regex will only identify IP addresses and cuts off CIDR extensions
#match = re.search("(?P<url>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})", match)
match = re.search("(?P<url>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?:/\d{1,2}|))", match)
if match is None:
continue
else:
match = match.group('url')
matches.append(match)
return matches
def cleanupScopeGeneral(dfIn):
matchStringSpace = ' '
matchStringGit = 'github.com'
matchStringUrl = 'http'
matchStringWild = '\*.'
matchStringDot = '.'
matches = []
for match in dfIn:
matchWild = re.search("(?P<url>[*][^\s|\,]+)", match)
matchDot = re.search("(?P<url>[.][^\s]+)", match)
matchIP = re.search("(?P<url>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?:/\d{1,2}|))", match)
if matchStringGit in match:
continue
elif matchStringSpace in match:
continue
elif matchStringUrl in match:
url = urlparse(match)
match = url.netloc
matches.append(match)
continue
elif matchIP is not None:
continue
elif matchWild is not None:
match = match.replace('*.','')
matches.append(match)
continue
elif matchDot is None:
continue
else:
matches.append(match)
return matches
def extrapolateScope(programName, listscopein, listscopeout):
ScopeInURLs = cleanupScopeStrict(listscopein)
ScopeInGithub = cleanupScopeGithub(listscopein)
ScopeInWild = cleanupScopeWild(listscopein)
ScopeInGeneral = cleanupScopeGeneral(listscopein)
ScopeInIP = cleanupScopeIP(listscopein)
ScopeOutURLs = cleanupScopeStrict(listscopeout)
ScopeOutGithub = cleanupScopeGithub(listscopeout)
ScopeOutWild = cleanupScopeWild(listscopeout)
ScopeOutGeneral = cleanupScopeGeneral(listscopeout)
ScopeOutIP = cleanupScopeIP(listscopeout)
return ScopeInURLs, ScopeInGithub, ScopeInWild, ScopeInGeneral, ScopeInIP, ScopeOutURLs, ScopeOutGithub, ScopeOutWild, ScopeOutGeneral, ScopeOutIP
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment