Last active
March 3, 2018 15:23
-
-
Save NanoDano/10330026 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
/** | |
* filename: ghdb | |
* | |
* usage: ghdb [info|list|update] | |
* | |
* Crude example of PHP HTML DOM and SQLite combination to archive | |
* data from a website. Posted as reference. | |
* | |
* Crawls http://www.exploit-db.com/google-dorks/ and stores the | |
* dork, comment, and id to a SQLite file. It accepts arguments list, info, update | |
* | |
* Dependent on http://simplehtmldom.sourceforge.net/ | |
* http://downloads.sourceforge.net/project/simplehtmldom/simple_html_dom.php | |
* | |
* Last updated: March 2014 - [email protected] | |
* | |
*/ | |
require_once("simple_html_dom.php"); | |
// Set location of sqlite database file | |
$db_file = "dorks.db"; | |
// Open dorks db and create dorks table if needed | |
function init_db($db_filename = "db/dorks.db") { | |
$db = new PDO('sqlite:'.$db_filename); | |
$db->exec("CREATE TABLE IF NOT EXISTS dorks(id integer PRIMARY KEY," . | |
"dork text, category text)"); | |
return $db; | |
} | |
// Get highest ID from local db | |
function get_highest_db_id($db = null) { | |
if ($db == null) die("get_highest_db_id() - No database provided."); | |
$query = "SELECT id FROM dorks ORDER BY id DESC"; | |
$stmt = $db->prepare($query); | |
$stmt->execute(); | |
$row = $stmt->fetch(); | |
return $row['id']; | |
} | |
// Get ID of latest dork | |
function get_newest_id($html) { | |
$latest_ids = array(); | |
foreach ($html->find('td a') as $element) { // get links from table | |
//echo $element->href; | |
$ghdb_pos = strpos($element->href, "/ghdb/"); // Identify category links | |
if ($ghdb_pos !== false) { // if link points to /ghdb/ page | |
$id_pos = $ghdb_pos + 6; | |
$latest_ids[] = substr($element->href, $id_pos, -1); | |
} | |
} | |
arsort($latest_ids); // sort highest first | |
$latest_id = array_shift($latest_ids); // grab top | |
return ($latest_id); | |
} | |
// Get ID/Names of categories | |
function get_categories($html) { | |
$categories = array(); | |
foreach($html->find('a') as $element) { | |
// echo $element->href . "\n"; // Output full href | |
$dork_pos = strpos($element->href, "/google-dorks/"); // Identify category links | |
if ($dork_pos !== false) { // link does point to category | |
$start_pos = $dork_pos + 14; // move starting position to / after google-dorks | |
$cat_id = substr($element->href, $start_pos, -1); // -1 to cut off last slash | |
$cat_name = $element->plaintext; | |
if (!empty($cat_id)) { | |
$categories[$cat_id] = $cat_name; | |
} | |
} | |
} | |
return ($categories); | |
} // get_categories() | |
// Get dorks from site given start and end it | |
function get_dorks($db, $highest_db_id, $newest_id) { | |
if ($newest_id > $highest_db_id) { | |
$start_id = $highest_db_id + 1; | |
for ($id = $start_id; $id <= $newest_id; $id++) { | |
$dork_page = str_get_html(file_get_contents('http://www.exploit-db.com/ghdb/' . $id . '/')); | |
$dork_text = ""; | |
$comment = ""; | |
foreach ($dork_page->find('h2 a') as $a) { | |
$dork_text = $a->href; | |
} | |
foreach ($dork_page->find('p.text') as $dork_comment) { | |
$comment = $dork_comment->plaintext; | |
} | |
if (!empty($dork_text)) { | |
echo "Adding dork: $id - $dork_text\n"; | |
add_dork($db, $id, $dork_text, $comment); | |
} | |
} | |
} | |
} | |
// Add google dork to database | |
function add_dork($db, $id, $dork, $category) { | |
$db->exec("INSERT INTO dorks (id, dork, category) VALUES (" . $id . ", \"" . $dork . "\", \"" . $category . "\")"); | |
} | |
// Get all dorks and print | |
function list_dorks($db, $start_id = 1, $end_id = 1000000) { | |
$ret = $db->query("SELECT * FROM dorks"); // Add start/end id | |
foreach ($ret as $row) { | |
echo "$row[id] - $row[dork]\n$row[category]\n\n"; | |
} | |
} | |
// Print usage instructions | |
function print_usage($argv) { | |
echo "Usage: $argv[0] [info|list|update]\n"; | |
} | |
///////////////// | |
// Main | |
///////////////// | |
// Print usage if no args supplied | |
if ($argc < 2) { | |
print_usage($argv); | |
die(); | |
} | |
// Initialize everything and set vars | |
$db = init_db($db_file); // Prepare SQLite3 database | |
$landing = str_get_html(gzdecode(file_get_contents('http://www.exploit-db.com/google-dorks/'))); // Load page into simple_html_dom object and decompress it | |
$categories = get_categories($landing); | |
$newest_id = get_newest_id($landing); | |
$highest_db_id = get_highest_db_id($db); | |
// Handle input | |
switch ($argv[1]) { | |
case "update": | |
get_dorks($db, $highest_db_id, $newest_id); // Get latest dorks | |
break; | |
case "list": | |
list_dorks($db); | |
break; | |
case "info": | |
echo "Categories: "; print_r($categories); echo "\n"; | |
echo "Newest ID on GHDB: "; print_r($newest_id); echo "\n"; | |
echo "Highest DB ID: "; print_r($highest_db_id); echo "\n"; | |
break; | |
default: | |
print_usage(); | |
break; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice
But, if you want to become a google dorks expert.
you must need to watch this video -
here is the link => https://www.youtube.com/watch?v=B9xJ54aGppc & https://www.youtube.com/watch?v=VSZyeYnZk3c
What You will learn from this video ?
*how google dorks works
*how to use google dorks for penetration testing
*how to use google dorks for security searching
*how much google dorks is powerful
*find vulnerable website using google dorks
*find someones information with google dorks
*how to use google dorks for information gathering
*database exploit with google dorks
*how to become a expert of google dorks searcher
*do mastering at google dorks
A thanks is more appreciate :)