Last active
June 23, 2016 08:22
-
-
Save totya24/08d7ff81bbf0a588bced to your computer and use it in GitHub Desktop.
jQuery method usage statistic
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** included from: https://github.com/neitanod/forceutf8 */ | |
class Encoding { | |
const ICONV_TRANSLIT = "TRANSLIT"; | |
const ICONV_IGNORE = "IGNORE"; | |
const WITHOUT_ICONV = ""; | |
protected static $win1252ToUtf8 = array( | |
128 => "\xe2\x82\xac", | |
130 => "\xe2\x80\x9a", | |
131 => "\xc6\x92", | |
132 => "\xe2\x80\x9e", | |
133 => "\xe2\x80\xa6", | |
134 => "\xe2\x80\xa0", | |
135 => "\xe2\x80\xa1", | |
136 => "\xcb\x86", | |
137 => "\xe2\x80\xb0", | |
138 => "\xc5\xa0", | |
139 => "\xe2\x80\xb9", | |
140 => "\xc5\x92", | |
142 => "\xc5\xbd", | |
145 => "\xe2\x80\x98", | |
146 => "\xe2\x80\x99", | |
147 => "\xe2\x80\x9c", | |
148 => "\xe2\x80\x9d", | |
149 => "\xe2\x80\xa2", | |
150 => "\xe2\x80\x93", | |
151 => "\xe2\x80\x94", | |
152 => "\xcb\x9c", | |
153 => "\xe2\x84\xa2", | |
154 => "\xc5\xa1", | |
155 => "\xe2\x80\xba", | |
156 => "\xc5\x93", | |
158 => "\xc5\xbe", | |
159 => "\xc5\xb8" | |
); | |
protected static $brokenUtf8ToUtf8 = array( | |
"\xc2\x80" => "\xe2\x82\xac", | |
"\xc2\x82" => "\xe2\x80\x9a", | |
"\xc2\x83" => "\xc6\x92", | |
"\xc2\x84" => "\xe2\x80\x9e", | |
"\xc2\x85" => "\xe2\x80\xa6", | |
"\xc2\x86" => "\xe2\x80\xa0", | |
"\xc2\x87" => "\xe2\x80\xa1", | |
"\xc2\x88" => "\xcb\x86", | |
"\xc2\x89" => "\xe2\x80\xb0", | |
"\xc2\x8a" => "\xc5\xa0", | |
"\xc2\x8b" => "\xe2\x80\xb9", | |
"\xc2\x8c" => "\xc5\x92", | |
"\xc2\x8e" => "\xc5\xbd", | |
"\xc2\x91" => "\xe2\x80\x98", | |
"\xc2\x92" => "\xe2\x80\x99", | |
"\xc2\x93" => "\xe2\x80\x9c", | |
"\xc2\x94" => "\xe2\x80\x9d", | |
"\xc2\x95" => "\xe2\x80\xa2", | |
"\xc2\x96" => "\xe2\x80\x93", | |
"\xc2\x97" => "\xe2\x80\x94", | |
"\xc2\x98" => "\xcb\x9c", | |
"\xc2\x99" => "\xe2\x84\xa2", | |
"\xc2\x9a" => "\xc5\xa1", | |
"\xc2\x9b" => "\xe2\x80\xba", | |
"\xc2\x9c" => "\xc5\x93", | |
"\xc2\x9e" => "\xc5\xbe", | |
"\xc2\x9f" => "\xc5\xb8" | |
); | |
protected static $utf8ToWin1252 = array( | |
"\xe2\x82\xac" => "\x80", | |
"\xe2\x80\x9a" => "\x82", | |
"\xc6\x92" => "\x83", | |
"\xe2\x80\x9e" => "\x84", | |
"\xe2\x80\xa6" => "\x85", | |
"\xe2\x80\xa0" => "\x86", | |
"\xe2\x80\xa1" => "\x87", | |
"\xcb\x86" => "\x88", | |
"\xe2\x80\xb0" => "\x89", | |
"\xc5\xa0" => "\x8a", | |
"\xe2\x80\xb9" => "\x8b", | |
"\xc5\x92" => "\x8c", | |
"\xc5\xbd" => "\x8e", | |
"\xe2\x80\x98" => "\x91", | |
"\xe2\x80\x99" => "\x92", | |
"\xe2\x80\x9c" => "\x93", | |
"\xe2\x80\x9d" => "\x94", | |
"\xe2\x80\xa2" => "\x95", | |
"\xe2\x80\x93" => "\x96", | |
"\xe2\x80\x94" => "\x97", | |
"\xcb\x9c" => "\x98", | |
"\xe2\x84\xa2" => "\x99", | |
"\xc5\xa1" => "\x9a", | |
"\xe2\x80\xba" => "\x9b", | |
"\xc5\x93" => "\x9c", | |
"\xc5\xbe" => "\x9e", | |
"\xc5\xb8" => "\x9f" | |
); | |
static function toUTF8($text){ | |
/** | |
* Function \ForceUTF8\Encoding::toUTF8 | |
* | |
* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. | |
* | |
* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. | |
* | |
* It may fail to convert characters to UTF-8 if they fall into one of these scenarios: | |
* | |
* 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß | |
* are followed by any of these: ("group B") | |
* ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿ | |
* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» | |
* The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) | |
* is also a valid unicode character, and will be left unchanged. | |
* | |
* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, | |
* 3) when any of these: ðñòó are followed by THREE chars from group B. | |
* | |
* @name toUTF8 | |
* @param string $text Any string. | |
* @return string The same string, UTF8 encoded | |
* | |
*/ | |
if(is_array($text)) | |
{ | |
foreach($text as $k => $v) | |
{ | |
$text[$k] = self::toUTF8($v); | |
} | |
return $text; | |
} | |
if(!is_string($text)) { | |
return $text; | |
} | |
$max = self::strlen($text); | |
$buf = ""; | |
for($i = 0; $i < $max; $i++){ | |
$c1 = $text{$i}; | |
if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already | |
$c2 = $i+1 >= $max? "\x00" : $text{$i+1}; | |
$c3 = $i+2 >= $max? "\x00" : $text{$i+2}; | |
$c4 = $i+3 >= $max? "\x00" : $text{$i+3}; | |
if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8 | |
if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already | |
$buf .= $c1 . $c2; | |
$i++; | |
} else { //not valid UTF8. Convert it. | |
$cc1 = (chr(ord($c1) / 64) | "\xc0"); | |
$cc2 = ($c1 & "\x3f") | "\x80"; | |
$buf .= $cc1 . $cc2; | |
} | |
} elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8 | |
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already | |
$buf .= $c1 . $c2 . $c3; | |
$i = $i + 2; | |
} else { //not valid UTF8. Convert it. | |
$cc1 = (chr(ord($c1) / 64) | "\xc0"); | |
$cc2 = ($c1 & "\x3f") | "\x80"; | |
$buf .= $cc1 . $cc2; | |
} | |
} elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8 | |
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already | |
$buf .= $c1 . $c2 . $c3 . $c4; | |
$i = $i + 3; | |
} else { //not valid UTF8. Convert it. | |
$cc1 = (chr(ord($c1) / 64) | "\xc0"); | |
$cc2 = ($c1 & "\x3f") | "\x80"; | |
$buf .= $cc1 . $cc2; | |
} | |
} else { //doesn't look like UTF8, but should be converted | |
$cc1 = (chr(ord($c1) / 64) | "\xc0"); | |
$cc2 = (($c1 & "\x3f") | "\x80"); | |
$buf .= $cc1 . $cc2; | |
} | |
} elseif(($c1 & "\xc0") == "\x80"){ // needs conversion | |
if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases | |
$buf .= self::$win1252ToUtf8[ord($c1)]; | |
} else { | |
$cc1 = (chr(ord($c1) / 64) | "\xc0"); | |
$cc2 = (($c1 & "\x3f") | "\x80"); | |
$buf .= $cc1 . $cc2; | |
} | |
} else { // it doesn't need conversion | |
$buf .= $c1; | |
} | |
} | |
return $buf; | |
} | |
static function toWin1252($text, $option = self::WITHOUT_ICONV) { | |
if(is_array($text)) { | |
foreach($text as $k => $v) { | |
$text[$k] = self::toWin1252($v, $option); | |
} | |
return $text; | |
} elseif(is_string($text)) { | |
return static::utf8_decode($text, $option); | |
} else { | |
return $text; | |
} | |
} | |
static function toISO8859($text) { | |
return self::toWin1252($text); | |
} | |
static function toLatin1($text) { | |
return self::toWin1252($text); | |
} | |
static function fixUTF8($text, $option = self::WITHOUT_ICONV){ | |
if(is_array($text)) { | |
foreach($text as $k => $v) { | |
$text[$k] = self::fixUTF8($v, $option); | |
} | |
return $text; | |
} | |
$last = ""; | |
while($last <> $text){ | |
$last = $text; | |
$text = self::toUTF8(static::utf8_decode($text, $option)); | |
} | |
$text = self::toUTF8(static::utf8_decode($text, $option)); | |
return $text; | |
} | |
static function UTF8FixWin1252Chars($text){ | |
// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 | |
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. | |
// See: http://en.wikipedia.org/wiki/Windows-1252 | |
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); | |
} | |
static function removeBOM($str=""){ | |
if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) { | |
$str=substr($str, 3); | |
} | |
return $str; | |
} | |
protected static function strlen($text){ | |
return (function_exists('mb_strlen') && ((int) ini_get('mbstring.func_overload')) & 2) ? | |
mb_strlen($text,'8bit') : strlen($text); | |
} | |
public static function normalizeEncoding($encodingLabel) | |
{ | |
$encoding = strtoupper($encodingLabel); | |
$encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding); | |
$equivalences = array( | |
'ISO88591' => 'ISO-8859-1', | |
'ISO8859' => 'ISO-8859-1', | |
'ISO' => 'ISO-8859-1', | |
'LATIN1' => 'ISO-8859-1', | |
'LATIN' => 'ISO-8859-1', | |
'UTF8' => 'UTF-8', | |
'UTF' => 'UTF-8', | |
'WIN1252' => 'ISO-8859-1', | |
'WINDOWS1252' => 'ISO-8859-1' | |
); | |
if(empty($equivalences[$encoding])){ | |
return 'UTF-8'; | |
} | |
return $equivalences[$encoding]; | |
} | |
public static function encode($encodingLabel, $text) | |
{ | |
$encodingLabel = self::normalizeEncoding($encodingLabel); | |
if($encodingLabel == 'ISO-8859-1') return self::toLatin1($text); | |
return self::toUTF8($text); | |
} | |
protected static function utf8_decode($text, $option) | |
{ | |
if ($option == self::WITHOUT_ICONV || !function_exists('iconv')) { | |
$o = utf8_decode( | |
str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text)) | |
); | |
} else { | |
$o = iconv("UTF-8", "Windows-1252" . ($option == self::ICONV_TRANSLIT ? '//TRANSLIT' : ($option == self::ICONV_IGNORE ? '//IGNORE' : '')), $text); | |
} | |
return $o; | |
} | |
} | |
function get_data($url) { | |
$ch = curl_init(); | |
$userAgent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)'; | |
curl_setopt($ch, CURLOPT_URL, $url); | |
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); | |
curl_setopt($ch, CURLOPT_FAILONERROR, true); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_AUTOREFERER, true); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 30); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); | |
$data = curl_exec($ch); | |
curl_close($ch); | |
return $data; | |
} | |
$message = ''; | |
$scripts = array(); | |
$globalstat = array(); | |
$hosts = array(); | |
$host_excludes = array( | |
'ad.adverticum.net', 'audit.median.hu', 'ajax.googleapis.com', 'html5shiv.googlecode.com', 'www.googleadservices.com', 'pagead2.googlesyndication.com' | |
); | |
if(isset($_GET['url'])){ | |
if(!filter_var($_GET['url'], FILTER_VALIDATE_URL) === false){ | |
$parsed = parse_url($_GET['url']); | |
$host = $parsed['host']; | |
$scheme = $parsed['scheme']; | |
$hosts = array(); | |
$content = (get_data($_GET['url'])); | |
$content = Encoding::toUTF8($content); | |
preg_match_all("/<script[^>]*?.*?<\/script>/siu", $content, $script_strings); | |
if(is_array($script_strings)){ | |
foreach($script_strings[0] as $line){ | |
preg_match( '/src="([^"]+)"/' , $line, $match ); | |
//todo: megnézni, hogy a host passzol-e. az external scripteket nem kéne szedni. külön input a cdn-eknek? | |
if($match[1]){ | |
$scripthost = parse_url($match[1])['host']; | |
if(!in_array($scripthost, $host_excludes)){ | |
$hosts[] = $scripthost; | |
$script = $match[1]; | |
if($scripthost === null){ | |
$script = $scheme."://".$host.( substr($script,0,1) != '/' ? '/' : '' ).$script; | |
} | |
$scripts[] = array('src' => $match[1], 'url' => $script); | |
} | |
} | |
} | |
$hosts = array_unique($hosts); | |
} | |
if(!count($scripts)){ | |
$message = 'No scripts found'; | |
} | |
foreach($scripts as $i => $script){ | |
$stat = array(); | |
$matches = array(); | |
$scriptsrc = get_data($script['url']); | |
preg_match_all('/[$|jQuery]\(...+\)\.([a-zA-Z]+)/i', $scriptsrc, $matches); | |
if(is_array($matches[1])){ | |
foreach($matches[1] as $match){ | |
if(!trim($match)) continue; | |
$stat[$match]++; | |
$globalstat[$match]++; | |
} | |
} | |
arsort($stat); | |
$scripts[$i]['stat'] = $stat; | |
} | |
} else { | |
$message = 'Invalid url!'; | |
} | |
} | |
?> | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> | |
<title>jQuery stat</title> | |
<link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.6.0/pure-min.css"> | |
<script>function toggletb(e){el=document.getElementById("tb_"+e),el.style.display="none"!=el.style.display?"none":""}</script> | |
<style>a{font-weight:400;text-decoration:none;color:#333}a:hover{color:#ec7703}</style> | |
</head> | |
<body> | |
<div style="max-width: 900px; margin: 20px auto"> | |
<h1>jQuery method usage statistic</h1> | |
<form method="GET" action="" class="pure-form"> | |
<input type="text" class="pure-input-2-3" name="url" placeholder="site url (begin with http://)" value="<?php echo $_GET['url'] ? $_GET['url'] : ''; ?>"> | |
<input type="submit" class="pure-button pure-button-primary"> | |
<?php echo $message; ?> | |
</form> | |
<?php | |
arsort($globalstat); | |
array_unshift($scripts, array('src' => 'All scripts', 'stat' => $globalstat)); | |
foreach($scripts as $i => $script){ | |
if(!count($script['stat'])) continue; | |
?> | |
<table class="pure-table pure-table-horizontal" style="width: 100%; margin-top: 20px;"> | |
<thead> | |
<tr> | |
<th colspan="2"><a href="<?php echo $script['url']; ?>" target="_blank"><?php echo $script['src']; ?></a></th> | |
<th style="width: 60px; text-align: center;"><a href="javascript:void(0)" onclick="toggletb(<?php echo $i; ?>)">[toggle]</a></th> | |
</tr> | |
</thead> | |
<tbody id="tb_<?php echo $i; ?>" <?php echo $i > 0 ? 'style="display: none;"' : '' ?>> | |
<?php | |
$all = array_sum($script['stat']); | |
foreach($script['stat'] as $f => $c){ | |
echo '<tr><td style="width: 200px;">$.'.$f.'()</td>'; | |
echo '<td><div style="background: #ec7703;width:'. ($c/$all*100) .'%;height: 20px;"></div></td>'; | |
echo '<td style="text-align: center;">'.$c.'</td></tr>'; | |
} | |
?> | |
</tbody> | |
</table> | |
<?php } | |
if(count($globalstat)){ | |
?> | |
<br /><br /><b>php stat</b> <a href="javascript:void(0)" onclick="toggletb(9999)">[toggle]</a><br /><br /> | |
<textarea id="tb_9999" onfocus="this.select();" onmouseup="return false;" style="display: none;width:100%; height: 300px;"><?php | |
echo "\$globalstats['". $host ."'] = '".serialize($globalstat)."';\n"; | |
?></textarea> | |
<?php | |
} | |
?> | |
</div> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment