Skip to content

Instantly share code, notes, and snippets.

@brevityinmotion
Last active July 28, 2021 04:36
Show Gist options
  • Save brevityinmotion/6aecb6b9c83e3718d99a83821723b3ae to your computer and use it in GitHub Desktop.
Save brevityinmotion/6aecb6b9c83e3718d99a83821723b3ae to your computer and use it in GitHub Desktop.
Modular httpx script to install, load, normalize, and process output data
import io, json
import brevitycore.core
def prepareHttpx(programName,inputBucketName, fileName):
# Anything other than initial will default to this
gospiderPath = programName + '-urls-mod.txt'
# If operation is initial, it will be domains-new as filename
diffPath = programName + '-domains-new.csv'
if (fileName == gospiderPath):
inputPath = '$HOME/security/inputs/' + programName + '/' + programName + '-urls-mod.txt'
outputPath = 'crawl'
if (fileName == diffPath):
inputPath = '$HOME/security/inputs/' + programName + '/' + programName + '-domains-all.csv'
outputPath = 'initial'
scriptStatus = generateScriptHttpx(programName,inputBucketName, inputPath, outputPath)
return scriptStatus
def generateScriptHttpx(programName, inputBucketName, inputPath, outputPath):
fileBuffer = io.StringIO()
fileContents = f"""#!/bin/bash
# Run custom httpx script
export HOME=/root
export PATH=/root/go/bin:$PATH
mkdir $HOME/security/raw/{programName}/responses
mkdir $HOME/security/raw/{programName}/httpx
mkdir $HOME/security/presentation
mkdir $HOME/security/presentation/httpx-json
export HTTPXINPUTPATH={inputPath}
if [ -f "$HTTPXINPUTPATH" ]; then
httpx -json -o $HOME/security/presentation/{programName}/httpx-json/{programName}-httpx-{outputPath}.json -l {inputPath} -status-code -title -location -content-type -web-server -no-color -tls-probe -x GET -ip -cname -cdn -content-length -sr -srd $HOME/security/raw/{programName}/responses -timeout 1
fi
sleep 10
# Remove .txt from all of the files
echo 'Completed crawl'
cd $HOME/security/raw/{programName}/responses/
for f in *.txt; do mv -- "$f" "${{f%.txt}}"; done
sleep 10
cd /root/security/raw/{programName}/
# tar -cvzf {programName}-responses.tar.gz responses
sleep 10
# mv $HOME/security/raw/{programName}/{programName}-responses.tar.gz $HOME/security/refined/{programName}/{programName}-responses.tar.gz
# rm -r responses
# sleep 10
#aws s3 cp $HOME/security/raw/{programName}/responses/ s3://brevity-raw/responses/{programName}/
#aws s3 cp $HOME/security/raw/{programName}/httpx/ s3://brevity-raw/httpx/{programName}/
sh $HOME/security/run/{programName}/sync-{programName}.sh
wait
sh $HOME/security/run/{programName}/stepfunctions-{programName}.sh"""
#fileContents = fileContents.format(programName)
fileBuffer.write(fileContents)
objectBuffer = io.BytesIO(fileBuffer.getvalue().encode())
# Upload file to S3
object_name = 'httpx-' + programName + '.sh'
object_path = 'run/' + programName + '/' + object_name
status = brevitycore.core.upload_object(objectBuffer,inputBucketName,object_path)
fileBuffer.close()
objectBuffer.close()
return status
def generateInstallScriptHttpx(inputBucketName):
# Load AWS access keys for s3 synchronization
secretName = 'brevity-aws-recon'
regionName = 'us-east-1'
secretRetrieved = brevitycore.core.get_secret(secretName,regionName)
secretjson = json.loads(secretRetrieved)
awsAccessKeyId = secretjson['AWS_ACCESS_KEY_ID']
awsSecretKey = secretjson['AWS_SECRET_ACCESS_KEY']
fileBuffer = io.StringIO()
fileContents = f"""#!/bin/bash
# Create directory structure
export HOME=/root
mkdir $HOME/security
mkdir $HOME/security/tools
mkdir $HOME/security/tools/amass
mkdir $HOME/security/tools/amass/db
mkdir $HOME/security/tools/hakrawler
mkdir $HOME/security/tools/httpx
mkdir $HOME/security/raw
mkdir $HOME/security/refined
mkdir $HOME/security/presentation
mkdir $HOME/security/presentation/httpx
mkdir $HOME/security/presentation/httpx-json
mkdir $HOME/security/scope
mkdir $HOME/security/install
mkdir $HOME/security/config
mkdir $HOME/security/run
mkdir $HOME/security/inputs
# Update apt repositories to avoid software installation issues
apt-get update
# Ensure OS and packages are fully upgraded
#apt-get -y upgrade
# Install Git
apt-get install -y git # May already be installed
# Install Python and Pip
apt-get install -y python3 # Likely is already installed
apt-get install -y python3-pip
# Install Golang via cli
apt-get install -y golang
echo 'export GOROOT=/usr/lib/go' >> ~/.bashrc
echo 'export GOPATH=$HOME/go' >> ~/.bashrc
echo 'export PATH=$GOPATH/bin:$GOROOT/bin:$PATH' >> ~/.bashrc
#source ~/.bashrc
# Install aws cli
apt-get install -y awscli
# Install go tools
GO111MODULE=on go get -v github.com/projectdiscovery/httpx/cmd/httpx"""
fileBuffer.write(fileContents)
objectBuffer = io.BytesIO(fileBuffer.getvalue().encode())
# Upload file to S3
object_name = 'bounty-startup-httpx.sh'
object_path = 'config/' + object_name
bucket = inputBucketName
installScriptStatus = brevitycore.core.upload_object(objectBuffer,bucket,object_path)
fileBuffer.close()
objectBuffer.close()
return installScriptStatus
def processHttpx(programName, refinedBucketPath, inputBucketPath, presentationBucketPath, operationName, programInputBucketPath):
#filePath = refinedBucketPath + programName + '/' + programName + '-httpx-crawl.json'
fileName = programName + '-httpx-' + operationName + '.json'
#filePath = refinedBucketPath + programName + '/' + fileName
presentationFilePath = presentationBucketPath + 'httpx-json/' + fileName
df = pd.read_json(presentationFilePath, lines=True)
df['program'] = programName
if (operationName == 'initial'):
storePathUrl = programInputBucketPath + programName + '/' + programName + '-httpx.csv'
df.to_csv(storePathUrl, header=False, index=False, sep='\n')
storePathUrl = programInputBucketPath + programName + '/' + programName + '-urls-base.csv'
dfUrls = df.drop_duplicates(subset=['url'])
dfUrls['url'].to_csv(storePathUrl, header=False, index=False, sep='\n')
from urllib.parse import urlparse
def _parseUrlRoot(urlvalue):
cleanurl = urlparse(urlvalue).netloc
return cleanurl
def _parseUrlBase(urlvalue):
baseurl = urlparse(urlvalue)#.netloc
baseurl = baseurl.scheme + '://' + baseurl.netloc + baseurl.path
return baseurl
df['domain'] = df['url'].apply(_parseUrlRoot)#, columns='url')#(dfAllDomains['url']).netloc#.str.replace(r'https?://[^/?:&"]', '')#http|https)://[^/?:&"]+
df['baseurl'] = df['url'].apply(_parseUrlBase)
fileOutputName = programName + '-httpx.json'
outputPath = presentationBucketPath + 'httpx/' + fileOutputName
# Check if there is already output so that it is not overwritten
try:
dfInitialHttpx = pd.read_json(outputPath, lines=True)
df = dfInitialHttpx.append(df)
df = df.drop_duplicates(subset=['url'], keep='last')
except:
print('No initial httpx output')
df.to_json(outputPath, orient='records', lines=True)# columns=['program','domain','baseurl','url','content-type','content-length','status-code'])
if (operationName == 'initial'):
fileOutputCrawl = programName + '-httpx-crawl.csv'
storePathUrl = inputBucketPath + programName + '/' + fileOutputName
df['url'].to_csv(storePathUrl, header=False, index=False, sep='\n')
return 'Success'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment