Created
May 20, 2013 01:19
-
-
Save anonymous/5609832 to your computer and use it in GitHub Desktop.
Clustering of Hofstede research results using SimpleKMeans Weka 3 implementation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Malaysia 104 26 50 36 | |
Guatemala 95 6 37 101 | |
Panama 95 11 44 86 | |
Philippines 94 32 64 44 19 | |
Mexico 81 30 69 82 | |
Venezuela 81 12 73 76 | |
China 80 20 66 40 118 | |
Egypt 80 38 52 68 | |
Iraq 80 38 52 68 | |
Kuwait 80 38 52 68 | |
Lebanon 80 38 52 68 | |
Libya 80 38 52 68 | |
Saudi Arabia 80 38 52 68 | |
United Arab Emirates 80 38 52 68 | |
Ecuador 78 8 63 67 | |
Indonesia 78 14 46 48 | |
Ghana 77 20 46 54 16 | |
India 77 48 56 40 61 | |
Nigeria 77 20 46 54 16 | |
Sierra Leone 77 20 46 54 16 | |
Singapore 74 20 48 8 48 | |
Brazil 69 38 49 76 65 | |
France 68 71 43 86 | |
Hong Kong 68 25 57 29 96 | |
Poland 68 60 64 93 | |
Colombia 67 13 64 80 | |
El Salvador 66 19 40 94 | |
Turkey 66 37 45 85 | |
Belgium 65 75 54 94 | |
Ethiopia 64 27 41 52 25 | |
Kenya 64 27 41 52 25 | |
Peru 64 16 42 87 | |
Tanzania 64 27 41 52 25 | |
Thailand 64 20 34 64 56 | |
Zambia 64 27 41 52 25 | |
Chile 63 23 28 86 | |
Portugal 63 27 31 104 | |
Uruguay 61 36 38 100 | |
Greece 60 35 57 112 | |
South Korea 60 18 39 85 75 | |
Iran 58 41 43 59 | |
Taiwan 58 17 45 69 87 | |
Czech Republic 57 58 57 74 | |
Spain 57 51 42 86 | |
Pakistan 55 14 50 70 | |
Japan 54 46 95 92 80 | |
Italy 50 76 70 75 | |
Argentina 49 46 56 86 | |
South Africa 49 65 63 49 | |
Hungary 46 55 88 82 | |
Jamaica 45 39 68 13 | |
United States 40 91 62 46 29 | |
Netherlands 38 80 14 53 44 | |
Australia 36 90 61 51 31 | |
Costa Rica 35 15 21 86 | |
Germany 35 67 66 65 31 | |
United Kingdom 35 89 66 35 25 | |
Switzerland 34 68 70 58 | |
Finland 33 63 26 59 | |
Norway 31 69 8 50 20 | |
Sweden 31 71 5 29 33 | |
Ireland 28 70 68 35 | |
New Zealand 22 79 58 49 30 | |
Denmark 18 74 16 23 | |
Israel 13 54 47 81 | |
Austria 11 55 79 70 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package test; | |
import java.io.BufferedReader; | |
import java.io.InputStream; | |
import java.io.InputStreamReader; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import weka.clusterers.SimpleKMeans; | |
import weka.core.Attribute; | |
import weka.core.DenseInstance; | |
import weka.core.EuclideanDistance; | |
import weka.core.Instance; | |
import weka.core.Instances; | |
public class HofstedeWekaClusterer { | |
public static void main(String[] args) throws Exception { | |
Attribute pdi = new Attribute("PDI"); | |
Attribute idv = new Attribute("IDV"); | |
Attribute mas = new Attribute("MAS"); | |
Attribute uai = new Attribute("UAI"); | |
SimpleKMeans clusterer = new SimpleKMeans(); | |
clusterer.setInitializeUsingKMeansPlusPlusMethod(true); | |
clusterer.setDontReplaceMissingValues(true); | |
clusterer.setDistanceFunction(new EuclideanDistance()); | |
clusterer.setMaxIterations(16); | |
clusterer.setNumClusters(4); | |
clusterer.setPreserveInstancesOrder(true); | |
InputStream input = HofstedeWekaClusterer.class.getClassLoader().getResourceAsStream("data"); | |
BufferedReader reader = new BufferedReader(new InputStreamReader(input)); | |
List<String> countries = new ArrayList<String>(); | |
ArrayList<Attribute> attrs = new ArrayList<Attribute>(); | |
attrs.add(pdi); | |
attrs.add(idv); | |
attrs.add(mas); | |
attrs.add(uai); | |
Instances data = new Instances("cultureDimensions", attrs, 100); | |
String line = null; | |
while ((line = reader.readLine()) != null) { | |
String[] p = line.split("\t"); | |
countries.add(p[0]); | |
Instance instance = new DenseInstance(4); | |
setValue(instance, pdi, p[1]); | |
setValue(instance, idv, p[2]); | |
setValue(instance, mas, p[3]); | |
setValue(instance, uai, p[4]); | |
data.add(instance); | |
} | |
reader.close(); | |
clusterer.buildClusterer(data); | |
System.out.println(clusterer.toString()); | |
Map<Integer, List<String>> result = new HashMap<Integer, List<String>>(); | |
for (int i = 0; i < clusterer.getAssignments().length; i++) { | |
int assignment = clusterer.getAssignments()[i]; | |
if (!result.containsKey(assignment)) { | |
result.put(assignment, new ArrayList<String>()); | |
} | |
result.get(assignment).add(countries.get(i)); | |
} | |
for (Integer assignment : result.keySet()) { | |
System.out.println(assignment); | |
for (String country : result.get(assignment)) { | |
System.out.println(country); | |
} | |
System.out.println(); | |
} | |
} | |
private static void setValue(Instance instance, Attribute att, String value) { | |
int intValue = Integer.valueOf(value) / 10; | |
instance.setValue(att, intValue); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
kMeans | |
====== | |
Number of iterations: 5 | |
Within cluster sum of squared errors: 4.9093889010504075 | |
Cluster centroids: | |
Cluster# | |
Attribute Full Data 0 1 2 3 | |
(66) (19) (17) (25) (5) | |
================================================================== | |
PDI 5.5909 3.7895 6.1765 7.16 2.6 | |
IDV 3.6364 6.2105 1.7059 2.36 6.8 | |
MAS 4.5758 5.8947 3.8235 4.84 0.8 | |
UAI 6.0303 6.4211 8.2353 4.68 3.8 | |
0 | |
France | |
Poland | |
Belgium | |
Czech Republic | |
Spain | |
Japan | |
Italy | |
Argentina | |
South Africa | |
Hungary | |
United States | |
Australia | |
Germany | |
United Kingdom | |
Switzerland | |
Ireland | |
New Zealand | |
Israel | |
Austria | |
1 | |
Guatemala | |
Panama | |
Mexico | |
Brazil | |
Colombia | |
El Salvador | |
Turkey | |
Peru | |
Thailand | |
Chile | |
Portugal | |
Uruguay | |
Greece | |
South Korea | |
Taiwan | |
Pakistan | |
Costa Rica | |
2 | |
Malaysia | |
Philippines | |
Venezuela | |
China | |
Egypt | |
Iraq | |
Kuwait | |
Lebanon | |
Libya | |
Saudi Arabia | |
United Arab Emirates | |
Ecuador | |
Indonesia | |
Ghana | |
India | |
Nigeria | |
Sierra Leone | |
Singapore | |
Hong Kong | |
Ethiopia | |
Kenya | |
Tanzania | |
Zambia | |
Iran | |
Jamaica | |
3 | |
Netherlands | |
Finland | |
Norway | |
Sweden | |
Denmark |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment