Last active
July 28, 2021 04:36
-
-
Save brevityinmotion/6aecb6b9c83e3718d99a83821723b3ae to your computer and use it in GitHub Desktop.
Modular httpx script to install, load, normalize, and process output data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io, json | |
import brevitycore.core | |
def prepareHttpx(programName,inputBucketName, fileName): | |
# Anything other than initial will default to this | |
gospiderPath = programName + '-urls-mod.txt' | |
# If operation is initial, it will be domains-new as filename | |
diffPath = programName + '-domains-new.csv' | |
if (fileName == gospiderPath): | |
inputPath = '$HOME/security/inputs/' + programName + '/' + programName + '-urls-mod.txt' | |
outputPath = 'crawl' | |
if (fileName == diffPath): | |
inputPath = '$HOME/security/inputs/' + programName + '/' + programName + '-domains-all.csv' | |
outputPath = 'initial' | |
scriptStatus = generateScriptHttpx(programName,inputBucketName, inputPath, outputPath) | |
return scriptStatus | |
def generateScriptHttpx(programName, inputBucketName, inputPath, outputPath): | |
fileBuffer = io.StringIO() | |
fileContents = f"""#!/bin/bash | |
# Run custom httpx script | |
export HOME=/root | |
export PATH=/root/go/bin:$PATH | |
mkdir $HOME/security/raw/{programName}/responses | |
mkdir $HOME/security/raw/{programName}/httpx | |
mkdir $HOME/security/presentation | |
mkdir $HOME/security/presentation/httpx-json | |
export HTTPXINPUTPATH={inputPath} | |
if [ -f "$HTTPXINPUTPATH" ]; then | |
httpx -json -o $HOME/security/presentation/{programName}/httpx-json/{programName}-httpx-{outputPath}.json -l {inputPath} -status-code -title -location -content-type -web-server -no-color -tls-probe -x GET -ip -cname -cdn -content-length -sr -srd $HOME/security/raw/{programName}/responses -timeout 1 | |
fi | |
sleep 10 | |
# Remove .txt from all of the files | |
echo 'Completed crawl' | |
cd $HOME/security/raw/{programName}/responses/ | |
for f in *.txt; do mv -- "$f" "${{f%.txt}}"; done | |
sleep 10 | |
cd /root/security/raw/{programName}/ | |
# tar -cvzf {programName}-responses.tar.gz responses | |
sleep 10 | |
# mv $HOME/security/raw/{programName}/{programName}-responses.tar.gz $HOME/security/refined/{programName}/{programName}-responses.tar.gz | |
# rm -r responses | |
# sleep 10 | |
#aws s3 cp $HOME/security/raw/{programName}/responses/ s3://brevity-raw/responses/{programName}/ | |
#aws s3 cp $HOME/security/raw/{programName}/httpx/ s3://brevity-raw/httpx/{programName}/ | |
sh $HOME/security/run/{programName}/sync-{programName}.sh | |
wait | |
sh $HOME/security/run/{programName}/stepfunctions-{programName}.sh""" | |
#fileContents = fileContents.format(programName) | |
fileBuffer.write(fileContents) | |
objectBuffer = io.BytesIO(fileBuffer.getvalue().encode()) | |
# Upload file to S3 | |
object_name = 'httpx-' + programName + '.sh' | |
object_path = 'run/' + programName + '/' + object_name | |
status = brevitycore.core.upload_object(objectBuffer,inputBucketName,object_path) | |
fileBuffer.close() | |
objectBuffer.close() | |
return status | |
def generateInstallScriptHttpx(inputBucketName): | |
# Load AWS access keys for s3 synchronization | |
secretName = 'brevity-aws-recon' | |
regionName = 'us-east-1' | |
secretRetrieved = brevitycore.core.get_secret(secretName,regionName) | |
secretjson = json.loads(secretRetrieved) | |
awsAccessKeyId = secretjson['AWS_ACCESS_KEY_ID'] | |
awsSecretKey = secretjson['AWS_SECRET_ACCESS_KEY'] | |
fileBuffer = io.StringIO() | |
fileContents = f"""#!/bin/bash | |
# Create directory structure | |
export HOME=/root | |
mkdir $HOME/security | |
mkdir $HOME/security/tools | |
mkdir $HOME/security/tools/amass | |
mkdir $HOME/security/tools/amass/db | |
mkdir $HOME/security/tools/hakrawler | |
mkdir $HOME/security/tools/httpx | |
mkdir $HOME/security/raw | |
mkdir $HOME/security/refined | |
mkdir $HOME/security/presentation | |
mkdir $HOME/security/presentation/httpx | |
mkdir $HOME/security/presentation/httpx-json | |
mkdir $HOME/security/scope | |
mkdir $HOME/security/install | |
mkdir $HOME/security/config | |
mkdir $HOME/security/run | |
mkdir $HOME/security/inputs | |
# Update apt repositories to avoid software installation issues | |
apt-get update | |
# Ensure OS and packages are fully upgraded | |
#apt-get -y upgrade | |
# Install Git | |
apt-get install -y git # May already be installed | |
# Install Python and Pip | |
apt-get install -y python3 # Likely is already installed | |
apt-get install -y python3-pip | |
# Install Golang via cli | |
apt-get install -y golang | |
echo 'export GOROOT=/usr/lib/go' >> ~/.bashrc | |
echo 'export GOPATH=$HOME/go' >> ~/.bashrc | |
echo 'export PATH=$GOPATH/bin:$GOROOT/bin:$PATH' >> ~/.bashrc | |
#source ~/.bashrc | |
# Install aws cli | |
apt-get install -y awscli | |
# Install go tools | |
GO111MODULE=on go get -v github.com/projectdiscovery/httpx/cmd/httpx""" | |
fileBuffer.write(fileContents) | |
objectBuffer = io.BytesIO(fileBuffer.getvalue().encode()) | |
# Upload file to S3 | |
object_name = 'bounty-startup-httpx.sh' | |
object_path = 'config/' + object_name | |
bucket = inputBucketName | |
installScriptStatus = brevitycore.core.upload_object(objectBuffer,bucket,object_path) | |
fileBuffer.close() | |
objectBuffer.close() | |
return installScriptStatus | |
def processHttpx(programName, refinedBucketPath, inputBucketPath, presentationBucketPath, operationName, programInputBucketPath): | |
#filePath = refinedBucketPath + programName + '/' + programName + '-httpx-crawl.json' | |
fileName = programName + '-httpx-' + operationName + '.json' | |
#filePath = refinedBucketPath + programName + '/' + fileName | |
presentationFilePath = presentationBucketPath + 'httpx-json/' + fileName | |
df = pd.read_json(presentationFilePath, lines=True) | |
df['program'] = programName | |
if (operationName == 'initial'): | |
storePathUrl = programInputBucketPath + programName + '/' + programName + '-httpx.csv' | |
df.to_csv(storePathUrl, header=False, index=False, sep='\n') | |
storePathUrl = programInputBucketPath + programName + '/' + programName + '-urls-base.csv' | |
dfUrls = df.drop_duplicates(subset=['url']) | |
dfUrls['url'].to_csv(storePathUrl, header=False, index=False, sep='\n') | |
from urllib.parse import urlparse | |
def _parseUrlRoot(urlvalue): | |
cleanurl = urlparse(urlvalue).netloc | |
return cleanurl | |
def _parseUrlBase(urlvalue): | |
baseurl = urlparse(urlvalue)#.netloc | |
baseurl = baseurl.scheme + '://' + baseurl.netloc + baseurl.path | |
return baseurl | |
df['domain'] = df['url'].apply(_parseUrlRoot)#, columns='url')#(dfAllDomains['url']).netloc#.str.replace(r'https?://[^/?:&"]', '')#http|https)://[^/?:&"]+ | |
df['baseurl'] = df['url'].apply(_parseUrlBase) | |
fileOutputName = programName + '-httpx.json' | |
outputPath = presentationBucketPath + 'httpx/' + fileOutputName | |
# Check if there is already output so that it is not overwritten | |
try: | |
dfInitialHttpx = pd.read_json(outputPath, lines=True) | |
df = dfInitialHttpx.append(df) | |
df = df.drop_duplicates(subset=['url'], keep='last') | |
except: | |
print('No initial httpx output') | |
df.to_json(outputPath, orient='records', lines=True)# columns=['program','domain','baseurl','url','content-type','content-length','status-code']) | |
if (operationName == 'initial'): | |
fileOutputCrawl = programName + '-httpx-crawl.csv' | |
storePathUrl = inputBucketPath + programName + '/' + fileOutputName | |
df['url'].to_csv(storePathUrl, header=False, index=False, sep='\n') | |
return 'Success' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment