ts-3156 · December 16, 2011 09:49
diff --git a/charcount.php b/charcount.php
 <!doctype html>
 <html lang="ja">
 <head>
 <meta charset="UTF-8">
 <title>文字登場回数カウンタ</title>
 <style>
 body{
 	background-color:#eee;
 	color:#555;
 }
 th{
 	padding:0.5em;
 	text-align:center;
 	background-color:#aaa;
 	color:#fff;
 }
 td{
 	padding:0.5em;
 	background-color:#fff;
 	text-align:right;
 }
 </style>
 </style>
 </head>
 <body>
 <?php
 //■設定

 //処理時間制限
 //処理に時間がかかるので、PHPの時間制限を10分に緩めておく
 set_time_limit(600);

 //文字コード設定
 mb_internal_encoding("UTF-8");//←基本的に変更は不要
 $file_encoding = "UTF-8";//←対象とするファイルの文字コード

 //検索対象ディレクトリを指定
 //↓テスト用
 //$dir = "test";
 $dir = "twitter4j";

 //■設定ここまで

 //chars[文字] = 文字の登場回数
 $chars = Array();

 print "<h1>文字登場回数カウンタ</h1>\n";

 print "<h2>処理対象</h2>\n";
 print "<ol>";
 countCharsInDir($dir);
 print "</ol>";

 print "<h2>処理結果</h2>\n";
 print "<table>\n";
 print "<tr><th scope=\"col\">文字</th><th scope=\"col\">登場回数</td></tr>\n";
 arsort($chars);
 $count1 = 0;
 $count2 = 0;
 foreach($chars as $key => $value){
 	if($key === "\n") $key = "（改行）";
 	elseif($key === "\t") $key = "（タブ文字）";
 	elseif($key === " ") $key = "（半角スペース）";
 	elseif($key === "　") $key = "（全角スペース）";
 	print "<tr><td scope=\"row\">$key</td><td>$value</td></tr>\n";
 	$count1++;
 	$count2 += $value;
 }
 print "</table>";
 print "<p>以上 $count2 文字、$count1 種類</p>\n";




 function countCharsInDir($dir){
 	//ディレクトリをオープン
 	if (! $handle = opendir($dir)){
 		print "<li>エラー：ディレクトリのオープンに失敗しました！（$dir）</li>";
 		return;
 	}
 	
 	//ファイルを１つずつ開いて文字をカウントしていく
 	while (false !== ($file = readdir($handle))) {
 		//ファイルでなければ除外
 		if($file == "." || $file == "..") continue;
 		if(is_dir("$dir/$file")){
 			countCharsInDir("$dir/$file");
 			continue;
 		}
 		
 		//ファイル取得
 		$str = file_get_contents("$dir/$file");
 		print "<li>$dir/$file</li>";
 		
 		//文字コードをUTF-8に
 		$str = mb_convert_encoding($str, "UTF-8", $GLOBALS['file_encoding']);
 		
 		//CRLFはLFのみに変換
 		$str = str_replace("\r\n", "\n", $str);
 		//CRはLFに変換
 		$str = str_replace("\r", "\n", $str);
 		//文字数を数える
 		$length = mb_strlen($str);
 		
 		//0番目の文字から順に抜き出し、その文字を添え字とする連想配列（初期値は0）に1を加算する
 		for($i=0; $i < $length; $i++){
 			//$i番目の文字（0～$length-1まで）を抜き出します
 			$char = mb_substr($str, $i, 1);
 			
 			//UTF-8のBOMはカウンタから除外
 			if($char[0] == chr(0xef) && $char[1] == chr(0xbb) && $char[2] == chr(0xbf)) continue;
 			
 			//文字カウンタに加算します。
 			if(!$GLOBALS["chars"][$char]) $GLOBALS["chars"][$char] = 1;
 			else $GLOBALS["chars"][$char]++;
 		}
 	}
    closedir($handle);
 }


 ?>



 </body>
 </html>
	<!doctype html>
	<html lang="ja">
	<head>
	<meta charset="UTF-8">
	<title>文字登場回数カウンタ</title>
	<style>
	body{
	background-color:#eee;
	color:#555;
	}
	th{
	padding:0.5em;
	text-align:center;
	background-color:#aaa;
	color:#fff;
	}
	td{
	padding:0.5em;
	background-color:#fff;
	text-align:right;
	}
	</style>
	</style>
	</head>
	<body>
	<?php
	//■設定

	//処理時間制限
	//処理に時間がかかるので、PHPの時間制限を10分に緩めておく
	set_time_limit(600);

	//文字コード設定
	mb_internal_encoding("UTF-8");//←基本的に変更は不要
	$file_encoding = "UTF-8";//←対象とするファイルの文字コード

	//検索対象ディレクトリを指定
	//↓テスト用
	//$dir = "test";
	$dir = "twitter4j";

	//■設定ここまで

	//chars[文字] = 文字の登場回数
	$chars = Array();

	print "<h1>文字登場回数カウンタ</h1>\n";

	print "<h2>処理対象</h2>\n";
	print "<ol>";
	countCharsInDir($dir);
	print "</ol>";

	print "<h2>処理結果</h2>\n";
	print "<table>\n";
	print "<tr><th scope=\"col\">文字</th><th scope=\"col\">登場回数</td></tr>\n";
	arsort($chars);
	$count1 = 0;
	$count2 = 0;
	foreach($chars as $key => $value){
	if($key === "\n") $key = "（改行）";
	elseif($key === "\t") $key = "（タブ文字）";
	elseif($key === " ") $key = "（半角スペース）";
	elseif($key === "　") $key = "（全角スペース）";
	print "<tr><td scope=\"row\">$key</td><td>$value</td></tr>\n";
	$count1++;
	$count2 += $value;
	}
	print "</table>";
	print "<p>以上 $count2 文字、$count1 種類</p>\n";




	function countCharsInDir($dir){
	//ディレクトリをオープン
	if (! $handle = opendir($dir)){
	print "<li>エラー：ディレクトリのオープンに失敗しました！（$dir）</li>";
	return;
	}

	//ファイルを１つずつ開いて文字をカウントしていく
	while (false !== ($file = readdir($handle))) {
	//ファイルでなければ除外
	if($file == "." \|\| $file == "..") continue;
	if(is_dir("$dir/$file")){
	countCharsInDir("$dir/$file");
	continue;
	}

	//ファイル取得
	$str = file_get_contents("$dir/$file");
	print "<li>$dir/$file</li>";

	//文字コードをUTF-8に
	$str = mb_convert_encoding($str, "UTF-8", $GLOBALS['file_encoding']);

	//CRLFはLFのみに変換
	$str = str_replace("\r\n", "\n", $str);
	//CRはLFに変換
	$str = str_replace("\r", "\n", $str);
	//文字数を数える
	$length = mb_strlen($str);

	//0番目の文字から順に抜き出し、その文字を添え字とする連想配列（初期値は0）に1を加算する
	for($i=0; $i < $length; $i++){
	//$i番目の文字（0～$length-1まで）を抜き出します
	$char = mb_substr($str, $i, 1);

	//UTF-8のBOMはカウンタから除外
	if($char[0] == chr(0xef) && $char[1] == chr(0xbb) && $char[2] == chr(0xbf)) continue;

	//文字カウンタに加算します。
	if(!$GLOBALS["chars"][$char]) $GLOBALS["chars"][$char] = 1;
	else $GLOBALS["chars"][$char]++;
	}
	}
	closedir($handle);
	}


	?>



	</body>
	</html>