Created
January 22, 2011 18:07
-
-
Save xqus/791307 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
Copyright 2011 Audun Larsen. All rights reserved. | |
[email protected] | |
Redistribution and use, with or without modification, | |
are permitted provided that the following condition is met: | |
* Redistribution and use of source code must retain the above copyright notice, | |
this list of conditions and the following disclaimer. | |
THIS SOFTWARE IS PROVIDED BY ``AS IS'' | |
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY TYPE OF | |
DAMAGE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE. | |
*/ | |
require 'Snoopy.class.php'; | |
/* This is gonna be a really simple crawler. We are going to store all URIs in this array */ | |
$quee = array( | |
'http://www.joomlainorge.no', | |
'http://www.google.no/search?hl=no&client=opera&hs=4k2&rls=en&channel=suggest&q=inurl%3Akommune.no&aq=f&aqi=&aql=&oq=', | |
); | |
/* All the URIs we have crawled will be in this */ | |
$crawled = array(); | |
while(list($key, $uri) = each($quee)) { | |
$snoopy = new Snoopy; | |
$crawled[] = $uri; | |
unset($quee[$key]); | |
echo "Crawling $uri. (Crawled $key pages. ".sizeof($quee)." in quee.)\n"; | |
$snoopy->fetchlinks($uri); | |
$links = $snoopy->results; | |
foreach($links as $link) { | |
if(strpos($link, 'kommune.no') !== false) { | |
addQuee(expandUri($link, $uri)); | |
} | |
} | |
/* Now, check if site uses Joomla */ | |
/* First, get the base URL */ | |
$url = parse_url($uri); | |
$testUrl = $url['scheme'].'://'.$url['host'].'/index.php?tp=1'; | |
$snoopy = new Snoopy; | |
$snoopy->fetch($testUrl); | |
$text = $snoopy->results; | |
if(strpos($text, '<div class="mod-preview-info">') !== false) { | |
echo "Joomla!!!\n\n"; | |
} else { | |
echo "Not joomla.\n\n"; | |
} | |
sleep(3); | |
} | |
function addQuee($uri) { | |
global $crawled, $quee; | |
if(!in_array($uri, $crawled) && !in_array($uri, $quee)) { | |
$quee[] = $uri; | |
} | |
} | |
function expandUri($uri, $base) { | |
return $uri; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment