Created
May 1, 2010 07:37
-
-
Save udonchan/386133 to your computer and use it in GitHub Desktop.
2年前に書いたはてブの記事をクラスタリングするコードのかけら
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| public ArrayList<Integer> | |
| execClusterAnalize(int clusterSize, int maxTrialFreq){ | |
| /* セントロイド */ | |
| double[][] center = new double[clusterSize][getTagsLength()]; | |
| double[][] previousCenter = new double[clusterSize][getTagsLength()]; | |
| /* 試行回数のカウンタ */ | |
| int trialFreq=0; | |
| /* 所属クラスタ(pagesと関連づけ) */ | |
| ArrayList<Integer> belongingCluster = | |
| new ArrayList<Integer>(); | |
| for(int i=0;i<getPagesLength();i++) | |
| belongingCluster.add(new Integer(0)); | |
| /* ページの集合の中からランダムに選んで初期セントロイドとして初期化 */ | |
| for(int i=0; i<center.length; i++) | |
| center[i]= | |
| termVector[new Random().nextInt(getPagesLength())].clone(); | |
| while(!center.equals(previousCenter)){ | |
| /* 所属クラスタの決定 */ | |
| for(int i=0; i<getPagesLength();i++){ | |
| double currentDistans=0; | |
| double minDistans=1; | |
| for(int j=0; j<clusterSize; j++){ | |
| currentDistans= | |
| getDistans(termVector[i], center[j]); | |
| if(currentDistans<minDistans){ | |
| minDistans = currentDistans; | |
| belongingCluster.set(i, new Integer(j)); | |
| } | |
| } | |
| } | |
| previousCenter=center.clone(); | |
| center = new double[clusterSize][getTagsLength()]; | |
| /* セントロイドの再計算 */ | |
| for(int i=0; i<clusterSize; i++){ | |
| int counter=0; | |
| for(int j=0; j<belongingCluster.size(); j++){ | |
| if(belongingCluster.get(j).equals(i)){ | |
| for(int k=0;k<getTagsLength();k++) | |
| center[i][k]=termVector[j][k]; | |
| counter++; | |
| } | |
| } | |
| for(int j=0; j<center[i].length; j++) | |
| center[i][j]/=counter; | |
| } | |
| if(trialFreq%10==0) | |
| System.out.println(trialFreq +"tried and continue"); | |
| /* 試行回数による終了処理 */ | |
| if(++trialFreq>=maxTrialFreq && maxTrialFreq!=0) | |
| break; | |
| } | |
| System.out.println(trialFreq +" tried"); | |
| return belongingCluster; | |
| } | |
| // 距離は0から1のdoubleで返す | |
| private double getDistans(double[] a, double[] b){ | |
| /* コサイン尺度で求める */ | |
| return 1-scalarProduct(a, b)/norm(a)*norm(b); | |
| } | |
| private double scalarProduct(double[] a, double[] b){ | |
| double res=0; | |
| for(int i=0; i<a.length; i++) | |
| res+=a[i]*b[i]; | |
| return res; | |
| } | |
| private double norm(double[] a){ | |
| double res=0; | |
| for(double val: a) | |
| res+=pow(val, 2); | |
| return sqrt(res); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment