brevityinmotion · July 28, 2021 04:36
diff --git a/brevity-recon-httpx.py b/brevity-recon-httpx.py
 import io, json
 import brevitycore.core

 def prepareHttpx(programName,inputBucketName, fileName):

    # Anything other than initial will default to this
    gospiderPath = programName + '-urls-mod.txt'
    # If operation is initial, it will be domains-new as filename
    diffPath = programName + '-domains-new.csv'
    
    if (fileName == gospiderPath):
        inputPath = '$HOME/security/inputs/' + programName + '/' + programName + '-urls-mod.txt'
        outputPath = 'crawl'
    if (fileName == diffPath):
        inputPath = '$HOME/security/inputs/' + programName + '/' + programName + '-domains-all.csv'
        outputPath = 'initial'
    
    scriptStatus = generateScriptHttpx(programName,inputBucketName, inputPath, outputPath)
    return scriptStatus

 def generateScriptHttpx(programName, inputBucketName, inputPath, outputPath):
    fileBuffer = io.StringIO()
    fileContents = f"""#!/bin/bash

 # Run custom httpx script
 export HOME=/root
 export PATH=/root/go/bin:$PATH

 mkdir $HOME/security/raw/{programName}/responses
 mkdir $HOME/security/raw/{programName}/httpx
 mkdir $HOME/security/presentation
 mkdir $HOME/security/presentation/httpx-json

 export HTTPXINPUTPATH={inputPath}

 if [ -f "$HTTPXINPUTPATH" ]; then
    httpx -json -o $HOME/security/presentation/{programName}/httpx-json/{programName}-httpx-{outputPath}.json -l {inputPath} -status-code -title -location -content-type -web-server -no-color -tls-probe -x GET -ip -cname -cdn -content-length -sr -srd $HOME/security/raw/{programName}/responses -timeout 1
 fi
 sleep 10
 # Remove .txt from all of the files
 echo 'Completed crawl'
 cd $HOME/security/raw/{programName}/responses/
 for f in *.txt; do mv -- "$f" "${{f%.txt}}"; done
 sleep 10
 cd /root/security/raw/{programName}/
 # tar -cvzf {programName}-responses.tar.gz responses
 sleep 10
 # mv $HOME/security/raw/{programName}/{programName}-responses.tar.gz $HOME/security/refined/{programName}/{programName}-responses.tar.gz
 # rm -r responses
 # sleep 10
 #aws s3 cp $HOME/security/raw/{programName}/responses/ s3://brevity-raw/responses/{programName}/
 #aws s3 cp $HOME/security/raw/{programName}/httpx/ s3://brevity-raw/httpx/{programName}/
 sh $HOME/security/run/{programName}/sync-{programName}.sh
 wait
 sh $HOME/security/run/{programName}/stepfunctions-{programName}.sh"""

    #fileContents = fileContents.format(programName)
    fileBuffer.write(fileContents)
    objectBuffer = io.BytesIO(fileBuffer.getvalue().encode())
    # Upload file to S3
    object_name = 'httpx-' + programName + '.sh'
    object_path = 'run/' + programName + '/' + object_name
    status = brevitycore.core.upload_object(objectBuffer,inputBucketName,object_path)
    fileBuffer.close()
    objectBuffer.close()
    return status

 def generateInstallScriptHttpx(inputBucketName):
    # Load AWS access keys for s3 synchronization
    secretName = 'brevity-aws-recon'
    regionName = 'us-east-1'
    secretRetrieved = brevitycore.core.get_secret(secretName,regionName)
    secretjson = json.loads(secretRetrieved)
    awsAccessKeyId = secretjson['AWS_ACCESS_KEY_ID']
    awsSecretKey = secretjson['AWS_SECRET_ACCESS_KEY']
    
    fileBuffer = io.StringIO()
    fileContents = f"""#!/bin/bash

 # Create directory structure
 export HOME=/root
 mkdir $HOME/security
 mkdir $HOME/security/tools
 mkdir $HOME/security/tools/amass
 mkdir $HOME/security/tools/amass/db
 mkdir $HOME/security/tools/hakrawler
 mkdir $HOME/security/tools/httpx
 mkdir $HOME/security/raw
 mkdir $HOME/security/refined
 mkdir $HOME/security/presentation
 mkdir $HOME/security/presentation/httpx
 mkdir $HOME/security/presentation/httpx-json
 mkdir $HOME/security/scope
 mkdir $HOME/security/install
 mkdir $HOME/security/config
 mkdir $HOME/security/run
 mkdir $HOME/security/inputs

 # Update apt repositories to avoid software installation issues
 apt-get update

 # Ensure OS and packages are fully upgraded
 #apt-get -y upgrade

 # Install Git
 apt-get install -y git # May already be installed

 # Install Python and Pip
 apt-get install -y python3 # Likely is already installed
 apt-get install -y python3-pip

 # Install Golang via cli
 apt-get install -y golang

 echo 'export GOROOT=/usr/lib/go' >> ~/.bashrc
 echo 'export GOPATH=$HOME/go' >> ~/.bashrc
 echo 'export PATH=$GOPATH/bin:$GOROOT/bin:$PATH' >> ~/.bashrc
 #source ~/.bashrc
    
 # Install aws cli
 apt-get install -y awscli

 # Install go tools
 GO111MODULE=on go get -v github.com/projectdiscovery/httpx/cmd/httpx"""
    fileBuffer.write(fileContents)
    objectBuffer = io.BytesIO(fileBuffer.getvalue().encode())

    # Upload file to S3
    object_name = 'bounty-startup-httpx.sh'
    object_path = 'config/' + object_name
    bucket = inputBucketName
    installScriptStatus = brevitycore.core.upload_object(objectBuffer,bucket,object_path)
    fileBuffer.close()
    objectBuffer.close()
    return installScriptStatus

 def processHttpx(programName, refinedBucketPath, inputBucketPath, presentationBucketPath, operationName, programInputBucketPath):
    #filePath = refinedBucketPath + programName + '/' + programName + '-httpx-crawl.json'
    fileName = programName + '-httpx-' + operationName + '.json'
    #filePath = refinedBucketPath + programName + '/' + fileName
    presentationFilePath = presentationBucketPath + 'httpx-json/' + fileName
    
    df = pd.read_json(presentationFilePath, lines=True)
    df['program'] = programName
    
    if (operationName == 'initial'):
        storePathUrl = programInputBucketPath + programName + '/' + programName + '-httpx.csv'
        df.to_csv(storePathUrl, header=False, index=False, sep='\n')
    
        storePathUrl = programInputBucketPath + programName + '/' + programName + '-urls-base.csv'
        dfUrls = df.drop_duplicates(subset=['url'])
        dfUrls['url'].to_csv(storePathUrl, header=False, index=False, sep='\n')
    
    from urllib.parse import urlparse

    def _parseUrlRoot(urlvalue):
        cleanurl = urlparse(urlvalue).netloc
        return cleanurl

    def _parseUrlBase(urlvalue):
        baseurl = urlparse(urlvalue)#.netloc
        baseurl = baseurl.scheme + '://' + baseurl.netloc + baseurl.path
        return baseurl

    df['domain'] = df['url'].apply(_parseUrlRoot)#, columns='url')#(dfAllDomains['url']).netloc#.str.replace(r'https?://[^/?:&"]', '')#http|https)://[^/?:&"]+
    df['baseurl'] = df['url'].apply(_parseUrlBase)

    fileOutputName = programName + '-httpx.json'
    outputPath = presentationBucketPath + 'httpx/' + fileOutputName
    
    # Check if there is already output so that it is not overwritten
    try:
        dfInitialHttpx = pd.read_json(outputPath, lines=True)
        df = dfInitialHttpx.append(df)
        df = df.drop_duplicates(subset=['url'], keep='last')
    except:
        print('No initial httpx output')
      
    df.to_json(outputPath, orient='records', lines=True)# columns=['program','domain','baseurl','url','content-type','content-length','status-code'])

    if (operationName == 'initial'):
        fileOutputCrawl = programName + '-httpx-crawl.csv'
        storePathUrl = inputBucketPath + programName + '/' + fileOutputName
        df['url'].to_csv(storePathUrl, header=False, index=False, sep='\n')
    return 'Success'
	import io, json
	import brevitycore.core

	def prepareHttpx(programName,inputBucketName, fileName):

	# Anything other than initial will default to this
	gospiderPath = programName + '-urls-mod.txt'
	# If operation is initial, it will be domains-new as filename
	diffPath = programName + '-domains-new.csv'

	if (fileName == gospiderPath):
	inputPath = '$HOME/security/inputs/' + programName + '/' + programName + '-urls-mod.txt'
	outputPath = 'crawl'
	if (fileName == diffPath):
	inputPath = '$HOME/security/inputs/' + programName + '/' + programName + '-domains-all.csv'
	outputPath = 'initial'

	scriptStatus = generateScriptHttpx(programName,inputBucketName, inputPath, outputPath)
	return scriptStatus

	def generateScriptHttpx(programName, inputBucketName, inputPath, outputPath):
	fileBuffer = io.StringIO()
	fileContents = f"""#!/bin/bash

	# Run custom httpx script
	export HOME=/root
	export PATH=/root/go/bin:$PATH

	mkdir $HOME/security/raw/{programName}/responses
	mkdir $HOME/security/raw/{programName}/httpx
	mkdir $HOME/security/presentation
	mkdir $HOME/security/presentation/httpx-json

	export HTTPXINPUTPATH={inputPath}

	if [ -f "$HTTPXINPUTPATH" ]; then
	httpx -json -o $HOME/security/presentation/{programName}/httpx-json/{programName}-httpx-{outputPath}.json -l {inputPath} -status-code -title -location -content-type -web-server -no-color -tls-probe -x GET -ip -cname -cdn -content-length -sr -srd $HOME/security/raw/{programName}/responses -timeout 1
	fi
	sleep 10
	# Remove .txt from all of the files
	echo 'Completed crawl'
	cd $HOME/security/raw/{programName}/responses/
	for f in *.txt; do mv -- "$f" "${{f%.txt}}"; done
	sleep 10
	cd /root/security/raw/{programName}/
	# tar -cvzf {programName}-responses.tar.gz responses
	sleep 10
	# mv $HOME/security/raw/{programName}/{programName}-responses.tar.gz $HOME/security/refined/{programName}/{programName}-responses.tar.gz
	# rm -r responses
	# sleep 10
	#aws s3 cp $HOME/security/raw/{programName}/responses/ s3://brevity-raw/responses/{programName}/
	#aws s3 cp $HOME/security/raw/{programName}/httpx/ s3://brevity-raw/httpx/{programName}/
	sh $HOME/security/run/{programName}/sync-{programName}.sh
	wait
	sh $HOME/security/run/{programName}/stepfunctions-{programName}.sh"""

	#fileContents = fileContents.format(programName)
	fileBuffer.write(fileContents)
	objectBuffer = io.BytesIO(fileBuffer.getvalue().encode())
	# Upload file to S3
	object_name = 'httpx-' + programName + '.sh'
	object_path = 'run/' + programName + '/' + object_name
	status = brevitycore.core.upload_object(objectBuffer,inputBucketName,object_path)
	fileBuffer.close()
	objectBuffer.close()
	return status

	def generateInstallScriptHttpx(inputBucketName):
	# Load AWS access keys for s3 synchronization
	secretName = 'brevity-aws-recon'
	regionName = 'us-east-1'
	secretRetrieved = brevitycore.core.get_secret(secretName,regionName)
	secretjson = json.loads(secretRetrieved)
	awsAccessKeyId = secretjson['AWS_ACCESS_KEY_ID']
	awsSecretKey = secretjson['AWS_SECRET_ACCESS_KEY']

	fileBuffer = io.StringIO()
	fileContents = f"""#!/bin/bash

	# Create directory structure
	export HOME=/root
	mkdir $HOME/security
	mkdir $HOME/security/tools
	mkdir $HOME/security/tools/amass
	mkdir $HOME/security/tools/amass/db
	mkdir $HOME/security/tools/hakrawler
	mkdir $HOME/security/tools/httpx
	mkdir $HOME/security/raw
	mkdir $HOME/security/refined
	mkdir $HOME/security/presentation
	mkdir $HOME/security/presentation/httpx
	mkdir $HOME/security/presentation/httpx-json
	mkdir $HOME/security/scope
	mkdir $HOME/security/install
	mkdir $HOME/security/config
	mkdir $HOME/security/run
	mkdir $HOME/security/inputs

	# Update apt repositories to avoid software installation issues
	apt-get update

	# Ensure OS and packages are fully upgraded
	#apt-get -y upgrade

	# Install Git
	apt-get install -y git # May already be installed

	# Install Python and Pip
	apt-get install -y python3 # Likely is already installed
	apt-get install -y python3-pip

	# Install Golang via cli
	apt-get install -y golang

	echo 'export GOROOT=/usr/lib/go' >> ~/.bashrc
	echo 'export GOPATH=$HOME/go' >> ~/.bashrc
	echo 'export PATH=$GOPATH/bin:$GOROOT/bin:$PATH' >> ~/.bashrc
	#source ~/.bashrc

	# Install aws cli
	apt-get install -y awscli

	# Install go tools
	GO111MODULE=on go get -v github.com/projectdiscovery/httpx/cmd/httpx"""
	fileBuffer.write(fileContents)
	objectBuffer = io.BytesIO(fileBuffer.getvalue().encode())

	# Upload file to S3
	object_name = 'bounty-startup-httpx.sh'
	object_path = 'config/' + object_name
	bucket = inputBucketName
	installScriptStatus = brevitycore.core.upload_object(objectBuffer,bucket,object_path)
	fileBuffer.close()
	objectBuffer.close()
	return installScriptStatus

	def processHttpx(programName, refinedBucketPath, inputBucketPath, presentationBucketPath, operationName, programInputBucketPath):
	#filePath = refinedBucketPath + programName + '/' + programName + '-httpx-crawl.json'
	fileName = programName + '-httpx-' + operationName + '.json'
	#filePath = refinedBucketPath + programName + '/' + fileName
	presentationFilePath = presentationBucketPath + 'httpx-json/' + fileName

	df = pd.read_json(presentationFilePath, lines=True)
	df['program'] = programName

	if (operationName == 'initial'):
	storePathUrl = programInputBucketPath + programName + '/' + programName + '-httpx.csv'
	df.to_csv(storePathUrl, header=False, index=False, sep='\n')

	storePathUrl = programInputBucketPath + programName + '/' + programName + '-urls-base.csv'
	dfUrls = df.drop_duplicates(subset=['url'])
	dfUrls['url'].to_csv(storePathUrl, header=False, index=False, sep='\n')

	from urllib.parse import urlparse

	def _parseUrlRoot(urlvalue):
	cleanurl = urlparse(urlvalue).netloc
	return cleanurl

	def _parseUrlBase(urlvalue):
	baseurl = urlparse(urlvalue)#.netloc
	baseurl = baseurl.scheme + '://' + baseurl.netloc + baseurl.path
	return baseurl

	df['domain'] = df['url'].apply(_parseUrlRoot)#, columns='url')#(dfAllDomains['url']).netloc#.str.replace(r'https?://[^/?:&"]', '')#http\|https)://[^/?:&"]+
	df['baseurl'] = df['url'].apply(_parseUrlBase)

	fileOutputName = programName + '-httpx.json'
	outputPath = presentationBucketPath + 'httpx/' + fileOutputName

	# Check if there is already output so that it is not overwritten
	try:
	dfInitialHttpx = pd.read_json(outputPath, lines=True)
	df = dfInitialHttpx.append(df)
	df = df.drop_duplicates(subset=['url'], keep='last')
	except:
	print('No initial httpx output')

	df.to_json(outputPath, orient='records', lines=True)# columns=['program','domain','baseurl','url','content-type','content-length','status-code'])

	if (operationName == 'initial'):
	fileOutputCrawl = programName + '-httpx-crawl.csv'
	storePathUrl = inputBucketPath + programName + '/' + fileOutputName
	df['url'].to_csv(storePathUrl, header=False, index=False, sep='\n')
	return 'Success'