Created
April 16, 2014 06:15
-
-
Save tecknoh19/10815360 to your computer and use it in GitHub Desktop.
Python Google Dork Hacking Database Extraction Tool. Script will crawl exploit-db.com and copy the google dork database into a csv file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Exploit-db.com Google Dork Hacking Database Replicator written by Andy Bricker | |
# Proof of concept. You shouldnt use this script without prior consent from Exploit-db.com | |
# http://andybricker.com | |
# Contact: andy at andybricker.com | |
# Requirements | |
# Python 2.7 (Has not been tasted on later versions) | |
# Usage: | |
# python ghdb_ripper.py books stores -n 50 -l myLogFile.txt | |
# Script will crawl exploits_db.com google dork pages and build a csv output file containing line by line | |
# dork,date dork was added,dork description | |
# Like the script? Donate | |
# LiteCoin: LcFU5upJyS7FsEeB5sb25vFTS69dH6fugr | |
# DogeCoin: D7SPH1LYJn9Co4GCZePH3JvzR5RkZEPi5M | |
import urllib2 | |
import re | |
import time | |
import os | |
from optparse import OptionParser | |
options = OptionParser(usage='%prog number_of_dorks [options]', description='Exploit-db.Com GHDB Database Replicator') | |
options.add_option('-s', '--start_number', type='int', default=51, help='Dork number to start with (default: 5)') | |
options.add_option('-o', '--output_file', type='string', default="output.txt", help='Name of the output file. Paths accepted. User must have access to output path. (default: output.txt)') | |
opts, args = options.parse_args() | |
if len(args) < 1: | |
options.print_help() | |
exit() | |
dorkData = [] | |
output = "" | |
log_file = open(opts.output_file, "a") | |
if os.name == 'nt': | |
os.system('color a') | |
os.system('cls') | |
else: | |
os.system('clear') | |
max_range = int(args[0]) - opts.start_number | |
failed_atempts = 0 | |
for page in range(int(opts.start_number),int(max_range)): # 3943 Max Results | |
print "Grabbing " + 'http://www.exploit-db.com/ghdb/' + str(page) + '/' | |
print "========================================================================" | |
search_url = urllib2.Request('http://www.exploit-db.com/ghdb/' + str(page) + '/') | |
search_url.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5') | |
try: | |
search_response = urllib2.urlopen(search_url, timeout=6) | |
search_content = search_response.read() | |
dork = re.findall('<h1>(.*?)</h1>',search_content) | |
date_added = re.findall('<p>Submited: (.*?)</p>',search_content) | |
dork_desc = re.findall('<p class="text">(.*?)</p>',search_content) | |
print "Checking response" | |
try: | |
dork[0] | |
try: | |
date_added[0] | |
except: | |
date_added.append("0000-00-00") | |
try: | |
dork_desc[0] | |
except: | |
dork_desc.append("na") | |
log_file.write(dork[0] + "," + date_added[0] + "," + dork_desc[0] + "\n") | |
failed_attempts = 0 | |
except: | |
print "Communication error. Waiting 3 seconds." | |
time.sleep(3) | |
pass | |
search_response.close() | |
except: | |
print "Connection interrupted. Waiting 5 Seconds." | |
failed_attempts = failed_attempts + 1 | |
time.sleep(5) | |
if failed_attempts == 3: | |
print "Connection lost. Exiting." | |
log_file.close() | |
exit() | |
log_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
well considering im code-ly challenged. and way you could scribble me an example on how to pull up a list of swf files around the web? (im looking for flash games) and dont wanna use files 2 hd for hours and hours fire me an email if you'd like, also if you under stand JS and Css and what ever else is needed to create an online browser game (like plinko at faucetgame.com/plinko i may have a fairly lucrative job for you.
anyways hit me up at [email protected]