Created
June 9, 2021 03:55
-
-
Save brevityinmotion/255b5b11a1a880ebfab1ef00906d348c to your computer and use it in GitHub Desktop.
Function to run GoSpider and then process the output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generateScriptGoSpider(programName, inputBucketName): | |
fileBuffer = io.StringIO() | |
fileContents = f"""#!/bin/bash | |
# Run custom goSpider script | |
export HOME=/root | |
export PATH=/root/go/bin:$PATH | |
mkdir $HOME/security/refined/{programName} | |
mkdir $HOME/security/refined/{programName}/crawl | |
gospider -S $HOME/security/inputs/{programName}/{programName}-urls-base.csv -o $HOME/security/raw/{programName}/crawl -u web -t 1 -c 5 -d 1 --js --sitemap --robots --other-source --include-subs --include-other-source | |
cd $HOME/security/raw/{programName}/ | |
# Generate list of base urls | |
cat crawl/* | grep -Eo '(http|https)://[^/?:&"]+' | anew > $HOME/security/refined/{programName}/{programName}-urls-min.txt | |
sleep 20 | |
# Generate list of full urls | |
cat crawl/* | grep -Eo '(http|https)://[^*]+' | anew > $HOME/security/refined/{programName}/{programName}-urls-max.txt | |
sleep 20 | |
FILES=$HOME/security/raw/{programName}/crawl/* | |
for f in $FILES | |
do | |
cat $f | grep -Eo '(http|https)://[^/?:&"]+' | anew > $HOME/security/refined/{programName}/crawl/urls-simple-${{f##*/}}.txt | |
done | |
sleep 10 | |
sh $HOME/security/run/{programName}/sync-{programName}.sh""" | |
fileBuffer.write(fileContents) | |
objectBuffer = io.BytesIO(fileBuffer.getvalue().encode()) | |
# Upload file to S3 | |
object_name = 'crawl-' + programName + '.sh' | |
object_path = 'run/' + programName + '/' + object_name | |
status = brevitycore.upload_object(objectBuffer,inputBucketName,object_path) | |
fileBuffer.close() | |
objectBuffer.close() | |
return status |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment