Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active June 29, 2017 01:56
Show Gist options
  • Save ashenfad/f4198ffbdb5d0aabd1814767b2e76566 to your computer and use it in GitHub Desktop.
Save ashenfad/f4198ffbdb5d0aabd1814767b2e76566 to your computer and use it in GitHub Desktop.
t-SNE Layout of Titanic Clusters

A t-SNE inspired visualization of a BigML G-means clustering built from the Titanic dataset. Mouse over a cluster to see the centroid.

Set.prototype.union = function(setB) {
var union = new Set(this);
for (var elem of setB) {
union.add(elem);
}
return union;
}
Set.prototype.intersection = function(setB) {
var intersection = new Set();
for (var elem of setB) {
if (this.has(elem)) {
intersection.add(elem);
}
}
return intersection;
}
function termDist(set1, set2) {
if (set1.size == 0 && set2.size == 0) {
return 0;
} else if (set1.size == 0 || set2.size == 0) {
return 1;
} else {
return 1 - set1.intersection(set2).size / Math.sqrt(set1.size * set2.size);
}
}
/*
Given a BigML cluster resource, will return a function that computes
the distance between two clusters (referenced by their index)
*/
function clusterDistFn(resource) {
var clusters = resource.clusters.clusters;
for (var i in clusters) {
var center = clusters[i].center;
for (k in center) {
if (typeof center[k] == 'object') {
center[k] = new Set(center[k]);
}
}
}
var scales = resource.scales;
return function (a, b) {
var sum = 0;
var clusterA = clusters[a];
var clusterB = clusters[b];
for (var k in clusterA.center) {
var aVal = clusterA.center[k];
var bVal = clusterB.center[k];
var diff;
switch (typeof aVal) {
case 'number':
diff = aVal - bVal;
break;
case 'string':
diff = aVal == bVal ? 0 : 1;
break;
case 'object':
diff = termDist(aVal, bVal);
break;
}
diff *= scales[k];
sum += diff * diff;
}
/* console.log(clusterA.center, clusterB.center, a, b, Math.sqrt(sum)); */
return Math.sqrt(sum);
};
}
/*
Given a BigML cluster resource, returns a distance matrix for the
clusters
*/
function distMatrix (resource) {
var clusterCount = Object.keys(resource.clusters.clusters).length;
var distFn = clusterDistFn(resource);
var matrix = new Array(clusterCount);
for (var a = 0; a < clusterCount; a++) {
matrix[a] = new Array(clusterCount);
for (var b = 0; b < clusterCount; b++) {
if (a == b) {
matrix[a][b] = 0;
} else if (a > b) {
matrix[a][b] = matrix[b][a];
} else {
matrix[a][b] = distFn(a, b);
}
}
}
console.log(matrix);
return matrix;
}
{"balance_fields": true, "category": 0, "cluster_datasets": {}, "cluster_models": {}, "cluster_seed": "2c249dda00fbf54ab4cdd850532a584f286af5b6", "clusters": {"between_ss": 183.74804, "clusters": [{"center": {"000000": ["mrs"], "000001": 51.30229, "000002": "1st Class", "000007": "Southampton", "00000b": "TRUE"}, "count": 202, "distance": {"bins": [[0.08287, 2], [0.10532, 3], [0.13437, 4], [0.16596, 2], [0.18927, 1], [0.20577, 3], [0.25079, 12], [0.26334, 11], [0.27496, 10], [0.28606, 6], [0.30158, 6], [0.31234, 10], [0.32849, 4], [0.33892, 4], [0.35098, 28], [0.36583, 18], [0.38191, 12], [0.39995, 5], [0.40883, 6], [0.42855, 19], [0.44235, 8], [0.45669, 7], [0.47225, 4], [0.4877, 2], [0.50643, 2], [0.5199, 3], [0.53289, 2], [0.54572, 1], [0.5621, 2], [0.57564, 1], [0.58677, 3], [0.63103, 1]], "exact_histogram": {"populations": [1, 1, 3, 3, 1, 2, 1, 3, 0, 13, 20, 8, 14, 6, 34, 18, 11, 8, 20, 10, 8, 2, 3, 4, 2, 2, 3, 0, 1], "start": 0.06, "width": 0.02}, "maximum": 0.63103, "mean": 0.35445, "median": 0.35454, "minimum": 0.07978, "population": 202, "standard_deviation": 0.1001, "sum": 71.59813, "sum_squares": 27.39176, "variance": 0.01002}, "id": "000000", "name": "Cluster 0"}, {"center": {"000000": ["mr", "william"], "000001": 40.21564, "000002": "Victualling", "000007": "Southampton", "00000b": "FALSE"}, "count": 404, "distance": {"bins": [[0.01615, 1], [0.02503, 1], [0.0502, 3], [0.06212, 2], [0.09652, 5], [0.12597, 6], [0.13918, 1], [0.15001, 16], [0.16224, 17], [0.17528, 16], [0.19275, 12], [0.20617, 7], [0.22108, 6], [0.23235, 6], [0.24798, 4], [0.25998, 18], [0.27431, 35], [0.28529, 42], [0.29397, 50], [0.30568, 29], [0.31682, 16], [0.3284, 8], [0.33767, 5], [0.35092, 33], [0.36828, 11], [0.37883, 24], [0.38904, 14], [0.40192, 2], [0.41398, 2], [0.42343, 7], [0.43476, 4], [0.48858, 1]], "exact_histogram": {"populations": [1, 1, 4, 1, 5, 0, 7, 21, 23, 17, 9, 10, 13, 46, 93, 40, 15, 30, 29, 23, 5, 10, 0, 0, 1], "start": 0, "width": 0.02}, "maximum": 0.48858, "mean": 0.28052, "median": 0.29093, "minimum": 0.01615, "population": 404, "standard_deviation": 0.08132, "sum": 113.32894, "sum_squares": 34.45572, "variance": 0.00661}, "id": "000001", "name": "Cluster 1"}, {"center": {"000000": ["elizabeth", "miss", "mrs"], "000001": 24.81794, "000002": "3rd Class", "000007": "Southampton", "00000b": "TRUE"}, "count": 251, "distance": {"bins": [[0.10631, 1], [0.11573, 4], [0.13497, 1], [0.14587, 1], [0.16542, 6], [0.17455, 6], [0.18358, 2], [0.20197, 3], [0.21636, 5], [0.23075, 3], [0.24592, 12], [0.2576, 11], [0.26541, 7], [0.27222, 3], [0.28558, 17], [0.29493, 19], [0.30339, 26], [0.31133, 11], [0.31835, 18], [0.32561, 8], [0.33248, 11], [0.33962, 10], [0.35009, 21], [0.36275, 4], [0.38027, 9], [0.38996, 8], [0.39981, 8], [0.40882, 5], [0.41897, 2], [0.42625, 7], [0.44289, 1], [0.45361, 1]], "exact_histogram": {"populations": [5, 1, 1, 12, 2, 7, 4, 23, 11, 39, 45, 31, 25, 8, 16, 12, 7, 2], "start": 0.1, "width": 0.02}, "maximum": 0.45361, "mean": 0.30503, "median": 0.30754, "minimum": 0.10631, "population": 251, "standard_deviation": 0.06783, "sum": 76.56191, "sum_squares": 24.50375, "variance": 0.0046}, "id": "000002", "name": "Cluster 2"}, {"center": {"000000": ["george", "master", "miss"], "000001": 5.67897, "000002": "3rd Class", "000007": "Southampton", "00000b": "TRUE"}, "count": 118, "distance": {"bins": [[0.14819, 2], [0.17352, 4], [0.17717, 3], [0.17995, 1], [0.18489, 1], [0.18954, 3], [0.19407, 1], [0.20106, 1], [0.20977, 1], [0.26764, 2], [0.2822, 2], [0.28833, 7], [0.2952, 8], [0.30108, 13], [0.30431, 10], [0.30862, 13], [0.3149, 13], [0.32195, 8], [0.32712, 4], [0.33063, 2], [0.33616, 1], [0.34317, 1], [0.35051, 2], [0.35653, 1], [0.36455, 1], [0.37635, 2], [0.38154, 1], [0.38507, 2], [0.38895, 2], [0.3944, 4], [0.3994, 1], [0.41588, 1]], "exact_histogram": {"populations": [2, 0, 0, 8, 4, 1, 2, 0, 0, 0, 0, 0, 2, 0, 9, 11, 32, 16, 10, 3, 2, 2, 1, 2, 4, 6, 0, 1], "start": 0.14, "width": 0.01}, "maximum": 0.41588, "mean": 0.29936, "median": 0.30616, "minimum": 0.14742, "population": 118, "standard_deviation": 0.05762, "sum": 35.32475, "sum_squares": 10.9634, "variance": 0.00332}, "id": "000003", "name": "Cluster 3"}, {"center": {"000000": ["mr", "william"], "000001": 31.01374, "000002": "Engine", "000007": "Southampton", "00000b": "TRUE"}, "count": 271, "distance": {"bins": [[0.02087, 1], [0.04954, 1], [0.07453, 1], [0.08255, 1], [0.09483, 1], [0.10898, 1], [0.12499, 6], [0.137, 2], [0.14701, 8], [0.16024, 6], [0.17332, 9], [0.19007, 5], [0.20489, 6], [0.21994, 8], [0.23651, 4], [0.25421, 23], [0.2744, 22], [0.28476, 45], [0.2934, 21], [0.30475, 13], [0.31833, 7], [0.32781, 3], [0.33937, 1], [0.35023, 9], [0.35956, 10], [0.37375, 24], [0.38475, 20], [0.39818, 7], [0.40979, 1], [0.4209, 1], [0.4354, 3], [0.44921, 1]], "exact_histogram": {"populations": [1, 1, 1, 2, 1, 8, 10, 13, 5, 10, 8, 23, 22, 66, 20, 4, 13, 30, 24, 4, 4, 1], "start": 0.02, "width": 0.02}, "maximum": 0.44921, "mean": 0.28486, "median": 0.28528, "minimum": 0.02087, "population": 271, "standard_deviation": 0.07952, "sum": 77.19601, "sum_squares": 23.69714, "variance": 0.00632}, "id": "000004", "name": "Cluster 4"}, {"center": {"000000": ["charles", "james", "mr"], "000001": 26.94829, "000002": "2nd Class", "000007": "Southampton", "00000b": "FALSE"}, "count": 155, "distance": {"bins": [[0.08444, 1], [0.09152, 5], [0.10411, 5], [0.11519, 1], [0.11997, 1], [0.14797, 5], [0.15872, 5], [0.16652, 9], [0.17628, 12], [0.1841, 9], [0.19256, 9], [0.20302, 6], [0.2162, 8], [0.22538, 2], [0.23313, 4], [0.23971, 1], [0.24678, 8], [0.2543, 9], [0.26021, 5], [0.26967, 12], [0.2796, 3], [0.28566, 7], [0.29137, 1], [0.29667, 4], [0.30286, 7], [0.31175, 3], [0.32447, 2], [0.33623, 3], [0.34862, 3], [0.35964, 3], [0.37587, 1], [0.38354, 1]], "exact_histogram": {"populations": [1, 5, 5, 2, 0, 0, 3, 5, 11, 12, 9, 9, 6, 8, 2, 5, 8, 11, 11, 5, 9, 5, 7, 3, 2, 3, 2, 3, 1, 1, 1], "start": 0.08, "width": 0.01}, "maximum": 0.38354, "mean": 0.22658, "median": 0.22538, "minimum": 0.08444, "population": 155, "standard_deviation": 0.06816, "sum": 35.11948, "sum_squares": 8.67268, "variance": 0.00465}, "id": "000005", "name": "Cluster 5"}, {"center": {"000000": ["mr"], "000001": 24.47097, "000002": "3rd Class", "000007": "Southampton", "00000b": "FALSE"}, "count": 517, "distance": {"bins": [[0.01029, 4], [0.03068, 6], [0.05147, 6], [0.0753, 23], [0.08884, 12], [0.10386, 24], [0.11574, 22], [0.12548, 24], [0.13665, 21], [0.15312, 18], [0.16826, 24], [0.17608, 1], [0.18469, 11], [0.20196, 9], [0.21792, 15], [0.22929, 3], [0.24326, 11], [0.2534, 39], [0.26154, 41], [0.26915, 55], [0.27851, 57], [0.28839, 8], [0.29823, 14], [0.30783, 7], [0.31735, 5], [0.32622, 3], [0.33695, 5], [0.35125, 15], [0.36154, 14], [0.37095, 9], [0.38164, 10], [0.40852, 1]], "exact_histogram": {"populations": [4, 6, 6, 23, 12, 46, 40, 23, 25, 13, 14, 13, 54, 132, 33, 16, 7, 19, 26, 4, 1], "start": 0, "width": 0.02}, "maximum": 0.40852, "mean": 0.22006, "median": 0.25537, "minimum": 0.0097, "population": 517, "standard_deviation": 0.0897, "sum": 113.76952, "sum_squares": 29.18769, "variance": 0.00805}, "id": "000006", "name": "Cluster 6"}, {"center": {"000000": ["john", "mr", "william"], "000001": 24.50927, "000002": "Victualling", "000007": "Southampton", "00000b": "FALSE"}, "count": 221, "distance": {"bins": [[0.04605, 1], [0.05463, 2], [0.06818, 1], [0.08733, 1], [0.09649, 2], [0.1057, 5], [0.12009, 5], [0.14446, 10], [0.15403, 8], [0.16365, 16], [0.17365, 30], [0.1864, 20], [0.19833, 11], [0.21, 11], [0.22294, 7], [0.2415, 2], [0.24962, 5], [0.2611, 9], [0.26887, 8], [0.27537, 1], [0.28241, 6], [0.29256, 14], [0.3009, 12], [0.30788, 8], [0.31538, 3], [0.32255, 6], [0.32889, 4], [0.34247, 1], [0.34997, 3], [0.35983, 5], [0.37346, 2], [0.38519, 2]], "exact_histogram": {"populations": [3, 1, 3, 7, 5, 16, 46, 30, 15, 5, 11, 14, 23, 19, 10, 7, 4, 2], "start": 0.04, "width": 0.02}, "maximum": 0.38629, "mean": 0.2226, "median": 0.19877, "minimum": 0.04605, "population": 221, "standard_deviation": 0.07351, "sum": 49.195, "sum_squares": 12.1398, "variance": 0.0054}, "id": "000007", "name": "Cluster 7"}, {"center": {"000000": ["roberto", "sig"], "000001": 23.81894, "000002": "A la Carte", "000007": "Southampton", "00000b": "FALSE"}, "count": 60, "distance": {"bins": [[0.03745, 1], [0.07181, 2], [0.07367, 2], [0.0809, 2], [0.09225, 2], [0.10642, 2], [0.12322, 3], [0.12482, 1], [0.12803, 1], [0.1304, 1], [0.13884, 1], [0.14564, 4], [0.14783, 1], [0.14965, 2], [0.15608, 1], [0.15867, 2], [0.16458, 2], [0.17609, 2], [0.18305, 1], [0.18627, 2], [0.20188, 3], [0.24488, 2], [0.24894, 3], [0.25164, 1], [0.25717, 2], [0.25993, 6], [0.26419, 2], [0.26708, 1], [0.27259, 2], [0.28224, 1], [0.29302, 1], [0.29769, 1]], "exact_histogram": {"populations": [1, 0, 0, 0, 4, 2, 2, 2, 0, 5, 2, 7, 3, 2, 2, 3, 0, 3, 0, 0, 0, 5, 7, 5, 2, 1, 2], "start": 0.03, "width": 0.01}, "maximum": 0.29769, "mean": 0.18168, "median": 0.17022, "minimum": 0.03745, "population": 60, "standard_deviation": 0.07092, "sum": 10.90071, "sum_squares": 2.27719, "variance": 0.00503}, "id": "000008", "name": "Cluster 8"}], "fields": {"000000": {"column_number": 0, "datatype": "string", "name": "Name", "optype": "text", "order": 0, "preferred": true, "summary": {"average_length": 22.48415, "missing_count": 0, "tag_cloud": [["mr", 1589], ["miss", 270], ["mrs", 210], ["william", 175], ["john", 133], ["george", 92], ["charles", 90], ["thomas", 90], ["james", 88], ["henry", 84], ["edward", 68], ["master", 61], ["frederick", 54], ["joseph", 52], ["alfred", 49], ["arthur", 44], ["j", 44], ["sig", 43], ["mary", 42], ["w", 37], ["robert", 35], ["albert", 34], ["ernest", 33], ["richard", 32], ["frank", 31], ["elizabeth", 29], ["harry", 29], ["h", 28], ["walter", 25], ["samuel", 23], ["a", 22], ["francis", 21], ["johan", 21], ["alexander", 20], ["anna", 17], ["e", 16], ["herbert", 16], ["maria", 16], ["smith", 16], ["margaret", 15], ["patrick", 15], ["annie", 14], ["leonard", 14], ["o", 14], ["peter", 14], ["catherine", 13], ["f", 13], ["karl", 13], ["marie", 13], ["martin", 13], ["taylor", 13], ["alice", 12], ["g", 12], ["bertram", 11], ["brown", 11], ["david", 11], ["davies", 11], ["ellen", 11], ["ford", 11], ["harris", 11], ["percy", 11], ["reginald", 11], ["sage", 11], ["sidney", 11], ["van", 11], ["andersson", 10], ["andrew", 10], ["bridget", 10], ["dr", 10], ["victor", 10], ["williams", 10], ["august", 9], ["edith", 9], ["elias", 9], ["helen", 9], ["hugh", 9], ["jane", 9], ["jr", 9], ["rice", 9], ["white", 9], ["anne", 8], ["asplund", 8], ["goodwin", 8], ["jean", 8], ["kate", 8], ["louise", 8], ["m", 8], ["moore", 8], ["philip", 8], ["allen", 7], ["carter", 7], ["daniel", 7], ["de", 7], ["edgar", 7], ["edwin", 7], ["elisabeth", 7], ["emily", 7], ["florence", 7], ["fr", 7], ["harold", 7], ["johannes", 7], ["johnson", 7], ["katherine", 7], ["kelly", 7], ["l", 7], ["may", 7], ["michael", 7], ["nellie", 7], ["stephen", 7], ["t", 7], ["ward", 7], ["y", 7], ["abraham", 6], ["ada", 6], ["agnes", 6], ["augustus", 6], ["benjamin", 6], ["bertha", 6], ["c", 6], ["carl", 6], ["delia", 6], ["douglas", 6], ["ernst", 6], ["fortune", 6], ["gustaf", 6], ["hansen", 6], ["hart", 6], ["ida", 6], ["ivan", 6], ["johansson", 6], ["jones", 6], ["lefebvre", 6], ["louis", 6], ["nils", 6], ["norman", 6], ["olsen", 6], ["panula", 6], ["paul", 6], ["phillips", 6], ["ryerson", 6], ["s", 6], ["skoog", 6], ["sofia", 6], ["tannous", 6], ["watson", 6], ["amelia", 5], ["anderson", 5], ["archibald", 5], ["arnold", 5], ["augusta", 5], ["b", 5], ["boulos", 5], ["chapman", 5], ["clifford", 5], ["constance", 5], ["dean", 5], ["edvin", 5], ["emil", 5], ["emilio", 5], ["emma", 5], ["eugene", 5], ["evans", 5], ["georges", 5], ["gilbert", 5], ["giuseppe", 5], ["gordon", 5], ["graham", 5], ["gustafsson", 5], ["hanna", 5], ["herman", 5], ["howard", 5], ["jensen", 5], ["johnston", 5], ["kink", 5], ["lewis", 5], ["lillian", 5], ["mabel", 5], ["nora", 5], ["owen", 5], ["palsson", 5], ["percival", 5], ["pierre", 5], ["r", 5], ["scott", 5], ["stanley", 5], ["abbott", 4], ["adolf", 4], ["ali", 4], ["allison", 4], ["austin", 4], ["baclini", 4], ["bailey", 4], ["baptiste", 4], ["barbara", 4], ["baxter", 4], ["becker", 4], ["bernard", 4], ["bradley", 4], ["cacic", 4], ["carlo", 4], ["caroline", 4], ["charlotte", 4], ["colonel", 4], ["connor", 4], ["crosby", 4], ["d", 4], ["davis", 4], ["dennis", 4], ["donald", 4], ["dorothy", 4], ["edvard", 4], ["eliza", 4], ["elsie", 4], ["emest", 4], ["ethel", 4], ["foley", 4], ["fox", 4], ["frances", 4], ["gerios", 4], ["giles", 4], ["goldsmith", 4], ["hall", 4], ["hannah", 4], ["hans", 4], ["harper", 4], ["hocking", 4], ["jacob", 4], ["jakob", 4], ["jessie", 4], ["jose", 4], ["josef", 4], ["julia", 4], ["katie", 4], ["king", 4], ["laroche", 4], ["lee", 4], ["leo", 4], ["leslie", 4], ["lucy", 4], ["luigi", 4], ["maggie", 4], ["major", 4], ["marion", 4], ["martha", 4], ["matilda", 4], ["maurice", 4], ["mccarthy", 4], ["morgan", 4], ["oliver", 4], ["olof", 4], ["oscar", 4], ["richards", 4], ["roberts", 4], ["rogers", 4], ["rowe", 4], ["ruth", 4], ["saunders", 4], ["simmons", 4], ["spencer", 4], ["svensson", 4], ["thompson", 4], ["vanderplancke", 4], ["ware", 4], ["west", 4], ["wilhelm", 4], ["wright", 4], ["youssef", 4], ["amy", 3], ["andersen", 3], ["andre", 3], ["andrews", 3], ["ann", 3], ["anthony", 3], ["auguste", 3], ["ball", 3], ["barker", 3], ["barnes", 3], ["bessie", 3], ["betros", 3], ["billiard", 3], ["bishop", 3], ["blake", 3], ["bourke", 3], ["brien", 3], ["buckley", 3], ["burke", 3], ["butt", 3], ["caldwell", 3], ["campbell", 3], ["carlsson", 3], ["cecil", 3], ["christopher", 3], ["christy", 3], ["clarence", 3], ["clark", 3], ["clarke", 3], ["collyer", 3], ["compton", 3], ["cook", 3], ["cor", 3], ["coutts", 3], ["cunningham", 3], ["daher", 3], ["daly", 3], ["danbom", 3], ["dickson", 3], ["dodge", 3], ["drew", 3], ["edmond", 3], ["eleanor", 3], ["elin", 3], ["erik", 3], ["eugenie", 3], ["fleming", 3], ["flynn", 3], ["francesco", 3], ["franklin", 3], ["frauenthal", 3], ["frolicher", 3], ["gertrude", 3], ["gill", 3], ["giovanni", 3], ["gladys", 3], ["hanora", 3], ["harder", 3], ["harrison", 3], ["hays", 3], ["hickman", 3], ["hill", 3], ["hoyt", 3], ["hubert", 3], ["hunt", 3], ["ii", 3], ["impe", 3], ["irene", 3], ["julian", 3], ["julius", 3], ["jussila", 3], ["karlsson", 3], ["keane", 3], ["kelly, mr james", 3], ["khalil", 3], ["klasen", 3], ["knight", 3], ["lane", 3], ["larsson", 3], ["laurence", 3], ["lawrence", 3], ["leopold", 3], ["light", 3], ["long", 3], ["louisa", 3], ["madeleine", 3], ["mallet", 3], ["mathias", 3], ["matthew", 3], ["mccoy", 3], ["meyer", 3], ["minahan", 3], ["morris", 3], ["moubarek", 3], ["murphy", 3], ["nakid", 3], ["navratil", 3], ["neal", 3], ["newell", 3], ["nichols", 3], ["nicola", 3], ["nilsson", 3], ["olive", 3], ["olsson", 3], ["oreskovic", 3], ["oskar", 3], ["p", 3], ["parsons", 3], ["peacock", 3], ["pearce", 3], ["quick", 3], ["ralph", 3], ["read", 3], ["rene", 3], ["robinson", 3], ["roger", 3], ["roland", 3], ["rosalie", 3], ["rosblom", 3], ["ross", 3], ["ryan", 3], ["samaan", 3], ["sandstrom", 3], ["sarah", 3], ["sigrid", 3], ["simon", 3], ["spedden", 3], ["stone", 3], ["taussig", 3], ["thayer", 3], ["thorne", 3], ["tom", 3], ["touma", 3], ["v", 3], ["veal", 3], ["viktor", 3], ["violet", 3], ["warren", 3], ["washington", 3], ["webber", 3], ["wells", 3], ["wick", 3], ["widener", 3], ["wilfred", 3], ["wood", 3], ["young", 3], ["aaron", 2], ["abelseth", 2], ["abelson", 2], ["achille", 2], ["adams", 2], ["adele", 2], ["ahmed", 2], ["akerman", 2], ["aks", 2], ["alexandra", 2], ["alfons", 2], ["alfrida", 2], ["allsop", 2], ["alma", 2], ["aloysius", 2], ["anders", 2], ["andreas", 2], ["angle", 2], ["anton", 2], ["antonine", 2], ["antonio", 2], ["antti", 2], ["archie", 2], ["arne", 2], ["arvid", 2], ["assad", 2], ["assaf", 2], ["astor", 2], ["backstrom", 2], ["barlow", 2], ["barrett", 2], ["barton", 2], ["battista", 2], ["beane", 2], ["beattie", 2], ["beauchamp", 2], ["beckwith", 2], ["bengtsson", 2], ["bennett", 2], ["bessant", 2], ["bird", 2], ["black", 2], ["bonnell", 2], ["borie", 2], ["borland", 2], ["bowen", 2], ["bowerman", 2], ["braund", 2], ["bristow", 2], ["bryhl", 2], ["burns", 2], ["calic", 2], ["canavan", 2], ["captain", 2], ["caram", 2], ["cardeza", 2], ["carr", 2], ["carrau", 2], ["castellana", 2], ["cavendish", 2], ["chaffee", 2], ["chambers", 2], ["charlotta", 2], ["chitty", 2], ["chronopoulos", 2], ["clara", 2], ["clement", 2], ["clench", 2], ["cohen", 2], ["coleman", 2], ["collins", 2], ["connolly", 2], ["connolly, miss kate", 2], ["cooper", 2], ["cornelius", 2], ["couch", 2], ["cribb", 2], ["crispin", 2], ["cumings", 2], ["daniels", 2], ["davidson", 2], ["davison", 2], ["del", 2], ["denis", 2], ["der", 2], ["dick", 2], ["dodd", 2], ["doling", 2], ["doyle", 2], ["duff", 2], ["duran", 2], ["dyer", 2], ["dyker", 2], ["edmund", 2], ["edwina", 2], ["edwy", 2], ["eileen", 2], ["einar", 2], ["eino", 2], ["ellis", 2], ["emanuel", 2], ["emile", 2], ["emilia", 2], ["ennis", 2], ["eustace", 2], ["eva", 2], ["evelyn", 2], ["everett", 2], ["ewart", 2], ["fenton", 2], ["fitzpatrick", 2], ["fletcher", 2], ["franchi", 2], ["franz", 2], ["fraser", 2], ["fred", 2], ["frederic", 2], ["fredrik", 2], ["futrelle", 2], ["gabriel", 2], ["gale", 2], ["garrett", 2], ["gerald", 2], ["gerda", 2], ["gertrud", 2], ["gibson", 2], ["gifford", 2], ["godfrey", 2], ["goldenberg", 2], ["gosling", 2], ["govanni", 2], ["grace", 2], ["green", 2], ["greenfield", 2], ["gretchen", 2], ["hagland", 2], ["hakkarainen", 2], ["hamalainen", 2], ["harald", 2], ["harris, mr edward", 2], ["harvey", 2], ["hawkesworth", 2], ["head", 2], ["hedwig", 2], ["heilmann", 2], ["helena", 2], ["helene", 2], ["henriette", 2], ["henrik", 2], ["hilda", 2], ["hippach", 2], ["hirvonen", 2], ["hodges", 2], ["hogg", 2], ["hold", 2], ["holland", 2], ["holverson", 2], ["hopkins", 2], ["houssein", 2], ["howell", 2], ["hudson", 2], ["hugo", 2], ["hulda", 2], ["humphrey", 2], ["humphreys", 2], ["hurst", 2], ["hutchinson", 2], ["ilmakangas", 2], ["ingeborg", 2], ["isaac", 2], ["isidor", 2], ["jabbur", 2], ["jackson", 2], ["jacobsohn", 2], ["jacques", 2], ["jago", 2], ["jarvis", 2], ["jeffery", 2], ["jefferys", 2], ["jennie", 2], ["jenny", 2], ["johanna", 2], ["jonsson", 2], ["josefina", 2], ["juha", 2], ["juho", 2], ["jules", 2], ["k", 2], ["kantor", 2], ["karun", 2], ["kearl", 2], ["keefe", 2], ["kenyon", 2], ["kieran", 2], ["kiernan", 2], ["kimball", 2], ["konrad", 2], ["kristina", 2], ["lahtinen", 2], ["lalio", 2], ["lam", 2], ["laura", 2], ["lauritz", 2], ["lazar", 2], ["leader", 2], ["lennon", 2], ["leon", 2], ["lester", 2], ["lily", 2], ["lindell", 2], ["lindqvist", 2], ["lines", 2], ["ling", 2], ["lionel", 2], ["lloyd", 2], ["lobb", 2], ["louch", 2], ["lovell", 2], ["lucas", 2], ["lucile", 2], ["luise", 2], ["luka", 2], ["mackay", 2], ["mackie", 2], ["mae", 2], ["malkolm", 2], ["manuel", 2], ["marcel", 2], ["marian", 2], ["marija", 2], ["marjorie", 2], ["mark", 2], ["marsh", 2], ["marvin", 2], ["mason", 2], ["mathilde", 2], ["matti", 2], ["mauritz", 2], ["mayo", 2], ["mcgough", 2], ["mcgowan", 2], ["mcmicken", 2], ["mcnamee", 2], ["mellinger", 2], ["messemaeker", 2], ["michel", 2], ["middleton", 2], ["millar", 2], ["minko", 2], ["miriam", 2], ["mitchell", 2], ["mlle", 2], ["mohamed", 2], ["montague", 2], ["moor", 2], ["moran", 2], ["more", 2], ["morley", 2], ["moss", 2], ["ms", 2], ["mullen", 2], ["murdoch", 2], ["nasser", 2], ["natalia", 2], ["nicholls", 2], ["nikolai", 2], ["noel", 2], ["norris", 2], ["noss", 2], ["olaf", 2], ["ostby", 2], ["painter", 2], ["parker", 2], ["parrish", 2], ["pauline", 2], ["pears", 2], ["peder", 2], ["penasco", 2], ["peracchio", 2], ["perry", 2], ["peters", 2], ["petroff", 2], ["pettersson", 2], ["phillippe", 2], ["phyllis", 2], ["pokrnic", 2], ["preston", 2], ["price", 2], ["pugh", 2], ["pusey", 2], ["rebecca", 2], ["reed", 2], ["reeves", 2], ["renouf", 2], ["risien", 2], ["roberto", 2], ["robins", 2], ["romaine", 2], ["rosa", 2], ["rothschild", 2], ["rudolf", 2], ["saad", 2], ["sara", 2], ["sarkis", 2], ["sawyer", 2], ["sebastiano", 2], ["self", 2], ["seward", 2], ["shea", 2], ["sigvard", 2], ["silvey", 2], ["slight", 2], ["sloan", 2], ["smyth", 2], ["snyder", 2], ["solomon", 2], ["sophia", 2], ["stehli", 2], ["stengel", 2], ["stewart", 2], ["straus", 2], ["strom", 2], ["stroud", 2], ["sullivan", 2], ["susan", 2], ["sydney", 2], ["terrell", 2], ["thamine", 2], ["theodor", 2], ["theodore", 2], ["thorneycroft", 2], ["thornton", 2], ["timothy", 2], ["tucker", 2], ["turpin", 2], ["vear", 2], ["vera", 2], ["vilhelm", 2], ["vincent", 2], ["virginia", 2], ["vivian", 2], ["walton", 2], ["watt", 2], ["webb", 2], ["weisz", 2], ["wiklund", 2], ["wilson", 2], ["winifred", 2], ["yarred", 2], ["yousseff", 2], ["zabour", 2], ["zakarian", 2]], "term_forms": {}}, "term_analysis": {"case_sensitive": false, "enabled": true, "language": "none", "token_mode": "all"}}, "000001": {"column_number": 1, "datatype": "double", "name": "Age", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0.57619, 21], [2.40909, 22], [4.31579, 19], [6.69231, 13], [8.52632, 19], [10.5, 12], [12.5, 8], [14.52381, 21], [16.57576, 66], [18.51261, 119], [20.50311, 161], [22.42529, 174], [24.46067, 178], [26.49673, 153], [28.44643, 168], [30.41765, 170], [32.40288, 139], [35.39565, 230], [39.25434, 173], [42.32836, 67], [44.53448, 58], [46.45946, 37], [48.41026, 39], [50.38462, 26], [52.25, 16], [54.45, 20], [56.54545, 11], [59.46429, 28], [63.09524, 21], [66.25, 4], [70.4, 5], [74, 1]], "maximum": 74, "mean": 29.91592, "median": 28.77918, "minimum": 0, "missing_count": 9, "population": 2199, "splits": [6.72305, 15.35521, 17.55471, 18.75882, 19.78967, 20.65126, 21.47403, 22.19231, 23.00633, 23.83205, 24.57523, 25.40281, 26.28421, 27.18135, 27.98149, 28.77918, 29.62674, 30.35564, 31.23689, 32.10886, 33.14472, 34.37121, 35.41958, 36.47909, 38.02014, 39.30014, 41.07406, 43.09555, 45.62986, 49.16016, 55.77627], "standard_deviation": 11.78309, "sum": 65785.1, "sum_squares": 2273194.61, "variance": 138.84125}}, "000002": {"column_number": 2, "datatype": "string", "name": "Class/Dept", "optype": "categorical", "order": 2, "preferred": true, "summary": {"categories": [["3rd Class", 708], ["Victualling", 431], ["Engine", 325], ["1st Class", 324], ["2nd Class", 285], ["A la Carte", 69], ["Deck", 66]], "missing_count": 0}, "term_analysis": {"case_sensitive": false, "enabled": true, "language": null, "stem_words": false, "token_mode": "all", "use_stopwords": true}}, "000007": {"column_number": 7, "datatype": "string", "name": "Joined", "optype": "categorical", "order": 3, "preferred": true, "summary": {"categories": [["Southampton", 1613], ["Cherbourg", 274], ["Belfast", 198], ["Queenstown", 120]], "missing_count": 3}, "term_analysis": {"case_sensitive": false, "enabled": true, "language": null, "stem_words": false, "token_mode": "all", "use_stopwords": true}}, "00000b": {"column_number": 11, "datatype": "string", "name": "Survived", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["FALSE", 1496], ["TRUE", 712]], "missing_count": 0}, "term_analysis": {"case_sensitive": false, "enabled": true, "language": null, "stem_words": false, "token_mode": "all", "use_stopwords": true}}}, "global": {"center": {"000000": ["miss", "mr", "mrs"], "000001": 29.91592, "000002": "3rd Class", "000007": "Southampton", "00000b": "FALSE"}, "distance": {"bins": [[0.11503, 13], [0.1577, 53], [0.18028, 54], [0.19923, 38], [0.21723, 19], [0.2398, 50], [0.26977, 53], [0.29752, 394], [0.32131, 172], [0.34315, 123], [0.36095, 87], [0.38475, 304], [0.40403, 173], [0.42334, 106], [0.44199, 73], [0.46341, 139], [0.49166, 54], [0.51305, 40], [0.54141, 50], [0.57073, 30], [0.59624, 34], [0.62959, 35], [0.66358, 24], [0.69063, 23], [0.71825, 12], [0.74222, 20], [0.76861, 12], [0.79585, 5], [0.81888, 3], [0.83818, 1], [0.89624, 2], [0.92342, 3]], "exact_histogram": {"populations": [26, 109, 83, 282, 447, 451, 314, 186, 84, 66, 47, 45, 32, 17, 5, 2, 3], "start": 0.1, "width": 0.05}, "maximum": 0.92839, "mean": 0.3829, "median": 0.37584, "minimum": 0.1035, "population": 2199, "standard_deviation": 0.12554, "sum": 841.99311, "sum_squares": 357.03717, "variance": 0.01576}}, "ratio_ss": 0.51465, "total_ss": 357.03717, "within_ss": 173.28913}, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2017-06-28T16:51:44.935000", "credits": 0, "credits_per_prediction": 0.0, "critical_value": 5, "dataset": "dataset/5430f6b40298d4075f000133", "dataset_field_types": {"categorical": 7, "datetime": 0, "numeric": 4, "preferred": 10, "text": 1, "total": 12}, "dataset_status": true, "dataset_type": 0, "description": "", "excluded_fields": [], "field_scales": {}, "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "input_fields": ["000000", "000001", "000002", "000007", "00000b"], "k": 9, "locale": "en-us", "max_columns": 12, "max_rows": 2208, "model_clusters": false, "name": "Titanic Survival", "number_of_batchcentroids": 0, "number_of_centroids": 0, "number_of_public_centroids": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "range": [1, 2208], "replacement": false, "resource": "cluster/5953dea049c4a1364f003235", "rows": 2208, "sample_rate": 1.0, "scales": {"000000": 0.24485331028918525, "000001": 0.02058675884325904, "000002": 0.24485331028918525, "000007": 0.24485331028918525, "00000b": 0.24485331028918525}, "seed": "c71814f0fb38391a53976be721e8c5e2", "shared": false, "size": 192362, "source": "source/5430f4cf0298d4074f0000ef", "source_status": true, "status": {"code": 5, "elapsed": 14030, "message": "The cluster has been created", "progress": 1.0}, "subscription": true, "summary_fields": [], "tags": [], "updated": "2017-06-28T16:52:02.690000", "white_box": false}
<!DOCTYPE html>
<html>
<meta charset="utf-8">
<style>
#words {
position: absolute;
top: 50px;
left: 700px;
display: flex;
flex-direction: column;
}
</style>
<head>
<script src="//d3js.org/d3.v3.min.js" charset="utf-8"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/seedrandom/2.4.0/seedrandom.min.js"></script>
<script src="cluster-dist.js" charset="utf-8"></script>
<script src="tsne.js" charset="utf-8"></script>
</head>
<body>
<svg width="850" height="500"></svg>
<div id="words"></div>
<script>
var width = 850;
var height = 500;
var colors = d3.scale.category20();
d3.json("clustered-titanic.json", function(error, resource) {
var fields = resource.clusters.fields;
var coords = tsneLayout(resource, "defaultseed");
var clusters = resource.clusters.clusters;
var clusterCount = Object.keys(clusters).length;
var range = Array.apply(null, Array(clusterCount)).map(function (_, i) {return i;});
// scale the tsne coordinates relative to the svg size
coords = scaleDimension(width, width/10, 0, coords);
coords = scaleDimension(height, height/10, 1, coords);
var words = d3.select("#words");
var colorFn = function (i, lightness) {
return d3.lab(lightness,
(200 * coords[i][0] / width) - 100,
(200 * coords[i][1] / height) - 100);
};
var svg = d3.select("svg");
svg.style("width", width);
svg.style("height", height);
svg.selectAll(".dot").data(range).enter()
.append("circle")
.attr("class", "dot")
.attr("r", function (i) {
// set topic radius relative to the 'topic_means' score
var area = clusters[i].count / resource.rows;
r = Math.sqrt(area) * 60;
return r;
})
.on("mouseenter", function (i) {
words.selectAll(".word").remove();
var cluster = clusters[i];
for (var k in cluster.center) {
var name = fields[k].name;
var val;
if (typeof cluster.center[k] == 'object') {
name += "(tokens)";
val = Array.from(cluster.center[k]);
} else {
val = cluster.center[k];
}
name += " : ";
words.append("text")
.attr("class", "word")
.text(name + val);
}
words.append("text").attr("class", "word")
.text("Cluster Size : " + cluster.count);
})
.on("mouseleave", function (i) {
words.selectAll(".word").remove();
})
.attr("cx", function (i) {return coords[i][0];})
.attr("cy", function (i) {return coords[i][1];})
.style("fill", function (i) { return colorFn(i, 80); })
.style("stroke", function (i) { return colorFn(i, 65); });
});
function scaleDimension(dimSize, buffer, dimIndex, coords) {
var minDim = coords[0][dimIndex];
var maxDim = coords[0][dimIndex];
for (var i in coords) {
minDim = Math.min(minDim, coords[i][dimIndex]);
maxDim = Math.max(maxDim, coords[i][dimIndex]);
}
var diff = maxDim - minDim;
var dimScale = dimSize - 2 * buffer;
for (var i in coords) {
coords[i][dimIndex] = (((coords[i][dimIndex] - minDim) / diff) * dimScale) + buffer;
}
return coords;
}
function tsneLayout(resource, seed) {
Math.seedrandom(seed);
var clusterCount = resource.clusters.clusters.length;
var perplexity = Math.round(clusterCount / 10);
perplexity = Math.max(perplexity, 2);
var tsne = new tsnejs.tSNE({epsilon: 10, perplexity: perplexity});
tsne.initDataDist(distMatrix(resource));
for (var k = 0; k < 300; k++) {
tsne.step();
}
return tsne.getSolution();
}
</script>
</body>
</html>
// from https://github.com/karpathy/tsnejs
// create main global object
var tsnejs = tsnejs || { REVISION: 'ALPHA' };
(function(global) {
"use strict";
// utility function
var assert = function(condition, message) {
if (!condition) { throw message || "Assertion failed"; }
}
// syntax sugar
var getopt = function(opt, field, defaultval) {
if(opt.hasOwnProperty(field)) {
return opt[field];
} else {
return defaultval;
}
}
// return 0 mean unit standard deviation random number
var return_v = false;
var v_val = 0.0;
var gaussRandom = function() {
if(return_v) {
return_v = false;
return v_val;
}
var u = 2*Math.random()-1;
var v = 2*Math.random()-1;
var r = u*u + v*v;
if(r == 0 || r > 1) return gaussRandom();
var c = Math.sqrt(-2*Math.log(r)/r);
v_val = v*c; // cache this for next function call for efficiency
return_v = true;
return u*c;
}
// return random normal number
var randn = function(mu, std){ return mu+gaussRandom()*std; }
// utilitity that creates contiguous vector of zeros of size n
var zeros = function(n) {
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
if(typeof ArrayBuffer === 'undefined') {
// lacking browser support
var arr = new Array(n);
for(var i=0;i<n;i++) { arr[i]= 0; }
return arr;
} else {
return new Float64Array(n); // typed arrays are faster
}
}
// utility that returns 2d array filled with random numbers
// or with value s, if provided
var randn2d = function(n,d,s) {
var uses = typeof s !== 'undefined';
var x = [];
for(var i=0;i<n;i++) {
var xhere = [];
for(var j=0;j<d;j++) {
if(uses) {
xhere.push(s);
} else {
xhere.push(randn(0.0, 1e-4));
}
}
x.push(xhere);
}
return x;
}
// compute L2 distance between two vectors
var L2 = function(x1, x2) {
var D = x1.length;
var d = 0;
for(var i=0;i<D;i++) {
var x1i = x1[i];
var x2i = x2[i];
d += (x1i-x2i)*(x1i-x2i);
}
return d;
}
// compute pairwise distance in all vectors in X
var xtod = function(X) {
var N = X.length;
var dist = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = L2(X[i], X[j]);
dist[i*N+j] = d;
dist[j*N+i] = d;
}
}
return dist;
}
// compute (p_{i|j} + p_{j|i})/(2n)
var d2p = function(D, perplexity, tol) {
var Nf = Math.sqrt(D.length); // this better be an integer
var N = Math.floor(Nf);
assert(N === Nf, "D should have square number of elements.");
var Htarget = Math.log(perplexity); // target entropy of distribution
var P = zeros(N * N); // temporary probability matrix
var prow = zeros(N); // a temporary storage compartment
for(var i=0;i<N;i++) {
var betamin = -Infinity;
var betamax = Infinity;
var beta = 1; // initial value of precision
var done = false;
var maxtries = 50;
// perform binary search to find a suitable precision beta
// so that the entropy of the distribution is appropriate
var num = 0;
while(!done) {
//debugger;
// compute entropy and kernel row with beta precision
var psum = 0.0;
for(var j=0;j<N;j++) {
var pj = Math.exp(- D[i*N+j] * beta);
if(i===j) { pj = 0; } // we dont care about diagonals
prow[j] = pj;
psum += pj;
}
// normalize p and compute entropy
var Hhere = 0.0;
for(var j=0;j<N;j++) {
var pj = prow[j] / psum;
prow[j] = pj;
if(pj > 1e-7) Hhere -= pj * Math.log(pj);
}
// adjust beta based on result
if(Hhere > Htarget) {
// entropy was too high (distribution too diffuse)
// so we need to increase the precision for more peaky distribution
betamin = beta; // move up the bounds
if(betamax === Infinity) { beta = beta * 2; }
else { beta = (beta + betamax) / 2; }
} else {
// converse case. make distrubtion less peaky
betamax = beta;
if(betamin === -Infinity) { beta = beta / 2; }
else { beta = (beta + betamin) / 2; }
}
// stopping conditions: too many tries or got a good precision
num++;
if(Math.abs(Hhere - Htarget) < tol) { done = true; }
if(num >= maxtries) { done = true; }
}
// console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
// copy over the final prow to P at row i
for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }
} // end loop over examples i
// symmetrize P and normalize it to sum to 1 over all ij
var Pout = zeros(N * N);
var N2 = N*2;
for(var i=0;i<N;i++) {
for(var j=0;j<N;j++) {
Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
}
}
return Pout;
}
// helper function
function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }
var tSNE = function(opt) {
var opt = opt || {};
this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
this.epsilon = getopt(opt, "epsilon", 10); // learning rate
this.iter = 0;
}
tSNE.prototype = {
// this function takes a set of high-dimensional points
// and creates matrix P from them using gaussian kernel
initDataRaw: function(X) {
var N = X.length;
var D = X[0].length;
assert(N > 0, " X is empty? You must have some data!");
assert(D > 0, " X[0] is empty? Where is the data?");
var dists = xtod(X); // convert X to distances using gaussian kernel
this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
this.N = N; // back up the size of the dataset
this.initSolution(); // refresh this
},
// this function takes a given distance matrix and creates
// matrix P from them.
// D is assumed to be provided as a list of lists, and should be symmetric
initDataDist: function(D) {
var N = D.length;
assert(N > 0, " X is empty? You must have some data!");
// convert D to a (fast) typed array version
var dists = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = D[i][j];
dists[i*N+j] = d;
dists[j*N+i] = d;
}
}
this.P = d2p(dists, this.perplexity, 1e-4);
this.N = N;
this.initSolution(); // refresh this
},
// (re)initializes the solution to random
initSolution: function() {
// generate random solution to t-SNE
this.Y = randn2d(this.N, this.dim); // the solution
this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
this.iter = 0;
},
// return pointer to current solution
getSolution: function() {
return this.Y;
},
// perform a single step of optimization to improve the embedding
step: function() {
this.iter += 1;
var N = this.N;
var cg = this.costGrad(this.Y); // evaluate gradient
var cost = cg.cost;
var grad = cg.grad;
// perform gradient step
var ymean = zeros(this.dim);
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
var gid = grad[i][d];
var sid = this.ystep[i][d];
var gainid = this.gains[i][d];
// compute gain update
var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
if(newgain < 0.01) newgain = 0.01; // clamp
this.gains[i][d] = newgain; // store for next turn
// compute momentum step direction
var momval = this.iter < 250 ? 0.5 : 0.8;
var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
this.ystep[i][d] = newsid; // remember the step we took
// step!
this.Y[i][d] += newsid;
ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
}
}
// reproject Y to be zero mean
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
this.Y[i][d] -= ymean[d]/N;
}
}
//if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
return cost; // return current cost
},
// for debugging: gradient check
debugGrad: function() {
var N = this.N;
var cg = this.costGrad(this.Y); // evaluate gradient
var cost = cg.cost;
var grad = cg.grad;
var e = 1e-5;
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
var yold = this.Y[i][d];
this.Y[i][d] = yold + e;
var cg0 = this.costGrad(this.Y);
this.Y[i][d] = yold - e;
var cg1 = this.costGrad(this.Y);
var analytic = grad[i][d];
var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);
this.Y[i][d] = yold;
}
}
},
// return cost and gradient, given an arrangement
costGrad: function(Y) {
var N = this.N;
var dim = this.dim; // dim of output space
var P = this.P;
var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima
// compute current Q distribution, unnormalized first
var Qu = zeros(N * N);
var qsum = 0.0;
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var dsum = 0.0;
for(var d=0;d<dim;d++) {
var dhere = Y[i][d] - Y[j][d];
dsum += dhere * dhere;
}
var qu = 1.0 / (1.0 + dsum); // Student t-distribution
Qu[i*N+j] = qu;
Qu[j*N+i] = qu;
qsum += 2 * qu;
}
}
// normalize Q distribution to sum to 1
var NN = N*N;
var Q = zeros(NN);
for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }
var cost = 0.0;
var grad = [];
for(var i=0;i<N;i++) {
var gsum = new Array(dim); // init grad for point i
for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
for(var j=0;j<N;j++) {
cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j];
for(var d=0;d<dim;d++) {
gsum[d] += premult * (Y[i][d] - Y[j][d]);
}
}
grad.push(gsum);
}
return {cost: cost, grad: grad};
}
}
global.tSNE = tSNE; // export tSNE class
})(tsnejs);
// export the library to window, or to module in nodejs
(function(lib) {
"use strict";
if (typeof module === "undefined" || typeof module.exports === "undefined") {
window.tsnejs = lib; // in ordinary browser attach library to window
} else {
module.exports = lib; // in nodejs
}
})(tsnejs);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment