Skip to content

Instantly share code, notes, and snippets.

@zenloner
Created June 14, 2013 07:55
Show Gist options
  • Save zenloner/5780208 to your computer and use it in GitHub Desktop.
Save zenloner/5780208 to your computer and use it in GitHub Desktop.
<?php
//0.函数名、变量名、返回值不可更改
function agent($line)
{
//1. 函数体前面部分填写所有需要在统计中使用的变量(不管是否能够匹配出变量值)
$res['_OriginalLogLine'] = $line;
$line = str_replace("\\x","%",$line); //以\x开头的需要换乘%才能解析出汉字
//$line = urldecode($line); //将line中的汉字解码出来
$res['_UrlPre'] = null;
$res['_UrlFields'] = array();
$res['_ClientIp'] = null;
$res['_ClientIpInLong'] = null;
$res['_AccessTimeHour'] = null;
$res['_AccessTimeMinute'] = null;
$res['_AccessTimeSecond'] = null;
$res['_TimeZone'] = null;
$res['_Method'] = null;
$res['_Url'] = null;
$res['_HttpVersion'] = null;
$res['_StatusCode'] = null;
$res['_Referer'] = null;
$res['_Cookie'] = null;
$res['_BaiduId'] = null;
$res['_Site'] = null;
$res['_UserAgent'] = null;
//2.书写匹配规则,并将匹配出来的结果赋给变量
preg_match("/(\d+\.\d+\.\d+\.\d+) .*?\[.*?:(\d+):(\d+):(\d+) (.*?)\] \"(.*?) (.*?) .TTP\/(.*?)\" (.*?) .*?\"(.*?)\"(.*?) \"(.*)\"$/", $line, $out);
$res['_ClientIp'] = $out[1];
$res['_ClientIpInLong'] = ip2long($out[1]);
$res['_AccessTimeHour'] = $out[2];
$res['_AccessTimeMinute'] = $out[3];
$res['_AccessTimeSecond'] = $out[4];
$res['_TimeZone'] = $out[5];
$res['_Method'] = $out[6];
$res['_Url'] = $out[7];
$res['_HttpVersion'] = $out[8];
$res['_StatusCode'] = $out[9];
$res['_Referer'] = $out[10];
$res['_Cookie'] = $out[11];
$res['_UserAgent'] = $out[12];
$url = $res['_Url'];
$cookie = $res['_Cookie'];
if(strpos($url,'?')>0){
$res['_UrlPre'] = substr($url,0,strpos($url,'?'));
}else{
$res['_UrlPre'] = $url;
}
//将url参数拆分出来
if(strpos($url,'?')>0){
$url = substr($url,strpos($url,'?')+1,strlen($url)-1);
$tmp = split('&',$url);
foreach($tmp as $tt){
$ttt = split('=',$tt);
$ttt[1] = urldecode($ttt[1]);
if(mb_detect_encoding($ttt[1],"UTF-8,GBK")=='UTF-8'){
$ttt[1] = iconv('UTF-8', 'GBK', $ttt[1]);
}
$res['_UrlFields'][$ttt[0]] = urldecode($ttt[1]);
}
}
if(preg_match("/BAIDUID=(.*?)[: ]/",$res['_Cookie'],$out)){
$res['_BaiduId'] = $out[1];
}
if(preg_match("/http:\/\/([^\/]*)/",urldecode($res['_Url']),$out)){
$res['_Site'] = $out[1];
}
return $res;
}
function read_log($log_name)
{
$file_handle = fopen($log_name, "r");
$parameter_dict = array();
while (!feof($file_handle))
{
$line = fgets($file_handle);
$result = agent($line);
foreach($result as $key=>$value)
{
if ("_UrlFields" == $key)
{
foreach ($value as $inner_key=>$inner_value)
{
if (!array_key_exists($inner_key, $parameter_dict))
{
$parameter_dict[$inner_key] = array($inner_value=>1);
}
else
{
if (!array_key_exists($inner_value, $parameter_dict[$inner_key]))
{
$parameter_dict[$inner_key][$inner_value] = 1;
}
}
}
break;
}
}
}
foreach ($parameter_dict as $para_key=>$para_value)
{
echo $para_key, "\n";
foreach ($para_value as $inner_key=>$inner_value)
{
echo $inner_key, ' ';
}
echo "\n=================\n";
}
fclose($file_handle);
}
function main()
{
$log_file_name = "pid=322";
read_log($log_file_name);
}
main();
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment