Created
June 14, 2013 07:55
-
-
Save zenloner/5780208 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//0.函数名、变量名、返回值不可更改 | |
function agent($line) | |
{ | |
//1. 函数体前面部分填写所有需要在统计中使用的变量(不管是否能够匹配出变量值) | |
$res['_OriginalLogLine'] = $line; | |
$line = str_replace("\\x","%",$line); //以\x开头的需要换乘%才能解析出汉字 | |
//$line = urldecode($line); //将line中的汉字解码出来 | |
$res['_UrlPre'] = null; | |
$res['_UrlFields'] = array(); | |
$res['_ClientIp'] = null; | |
$res['_ClientIpInLong'] = null; | |
$res['_AccessTimeHour'] = null; | |
$res['_AccessTimeMinute'] = null; | |
$res['_AccessTimeSecond'] = null; | |
$res['_TimeZone'] = null; | |
$res['_Method'] = null; | |
$res['_Url'] = null; | |
$res['_HttpVersion'] = null; | |
$res['_StatusCode'] = null; | |
$res['_Referer'] = null; | |
$res['_Cookie'] = null; | |
$res['_BaiduId'] = null; | |
$res['_Site'] = null; | |
$res['_UserAgent'] = null; | |
//2.书写匹配规则,并将匹配出来的结果赋给变量 | |
preg_match("/(\d+\.\d+\.\d+\.\d+) .*?\[.*?:(\d+):(\d+):(\d+) (.*?)\] \"(.*?) (.*?) .TTP\/(.*?)\" (.*?) .*?\"(.*?)\"(.*?) \"(.*)\"$/", $line, $out); | |
$res['_ClientIp'] = $out[1]; | |
$res['_ClientIpInLong'] = ip2long($out[1]); | |
$res['_AccessTimeHour'] = $out[2]; | |
$res['_AccessTimeMinute'] = $out[3]; | |
$res['_AccessTimeSecond'] = $out[4]; | |
$res['_TimeZone'] = $out[5]; | |
$res['_Method'] = $out[6]; | |
$res['_Url'] = $out[7]; | |
$res['_HttpVersion'] = $out[8]; | |
$res['_StatusCode'] = $out[9]; | |
$res['_Referer'] = $out[10]; | |
$res['_Cookie'] = $out[11]; | |
$res['_UserAgent'] = $out[12]; | |
$url = $res['_Url']; | |
$cookie = $res['_Cookie']; | |
if(strpos($url,'?')>0){ | |
$res['_UrlPre'] = substr($url,0,strpos($url,'?')); | |
}else{ | |
$res['_UrlPre'] = $url; | |
} | |
//将url参数拆分出来 | |
if(strpos($url,'?')>0){ | |
$url = substr($url,strpos($url,'?')+1,strlen($url)-1); | |
$tmp = split('&',$url); | |
foreach($tmp as $tt){ | |
$ttt = split('=',$tt); | |
$ttt[1] = urldecode($ttt[1]); | |
if(mb_detect_encoding($ttt[1],"UTF-8,GBK")=='UTF-8'){ | |
$ttt[1] = iconv('UTF-8', 'GBK', $ttt[1]); | |
} | |
$res['_UrlFields'][$ttt[0]] = urldecode($ttt[1]); | |
} | |
} | |
if(preg_match("/BAIDUID=(.*?)[: ]/",$res['_Cookie'],$out)){ | |
$res['_BaiduId'] = $out[1]; | |
} | |
if(preg_match("/http:\/\/([^\/]*)/",urldecode($res['_Url']),$out)){ | |
$res['_Site'] = $out[1]; | |
} | |
return $res; | |
} | |
function read_log($log_name) | |
{ | |
$file_handle = fopen($log_name, "r"); | |
$parameter_dict = array(); | |
while (!feof($file_handle)) | |
{ | |
$line = fgets($file_handle); | |
$result = agent($line); | |
foreach($result as $key=>$value) | |
{ | |
if ("_UrlFields" == $key) | |
{ | |
foreach ($value as $inner_key=>$inner_value) | |
{ | |
if (!array_key_exists($inner_key, $parameter_dict)) | |
{ | |
$parameter_dict[$inner_key] = array($inner_value=>1); | |
} | |
else | |
{ | |
if (!array_key_exists($inner_value, $parameter_dict[$inner_key])) | |
{ | |
$parameter_dict[$inner_key][$inner_value] = 1; | |
} | |
} | |
} | |
break; | |
} | |
} | |
} | |
foreach ($parameter_dict as $para_key=>$para_value) | |
{ | |
echo $para_key, "\n"; | |
foreach ($para_value as $inner_key=>$inner_value) | |
{ | |
echo $inner_key, ' '; | |
} | |
echo "\n=================\n"; | |
} | |
fclose($file_handle); | |
} | |
function main() | |
{ | |
$log_file_name = "pid=322"; | |
read_log($log_file_name); | |
} | |
main(); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment