Created
August 3, 2018 21:46
-
-
Save peterk87/b203f62a71d7f4fb273139b219af5e81 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Generating Newick string output from hierarchical clustering of some cgMLST profiles\n", | |
"\n", | |
"Assuming you have an array of cgMLST profiles in, for example, a Pandas DataFrame read from a CSV or tab-delimited file, you can perform hierarchical clustering and output a Newick string.\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"*Enabling in-line figures*" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"%pylab inline" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Imports" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/CSCScience.ca/pkruczkiewicz/2018-08-03-python-newick-mst/venv/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n", | |
" return f(*args, **kwds)\n" | |
] | |
} | |
], | |
"source": [ | |
"from typing import Dict, Tuple, List, Union, Optional\n", | |
"\n", | |
"import numpy as np\n", | |
"import scipy as sp\n", | |
"import pandas as pd\n", | |
"\n", | |
"from fastcluster import linkage\n", | |
"from scipy.sparse.csgraph import minimum_spanning_tree\n", | |
"from scipy.cluster.hierarchy import to_tree, ClusterNode, dendrogram\n", | |
"from scipy.spatial.distance import squareform" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Setting up some random cgMLST profile data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"np.random.seed = 42" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from string import ascii_lowercase" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"allele_number_min = 1\n", | |
"allele_number_max = 4\n", | |
"n_markers = 10" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Our profiles are random integers between a specified range" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cgmlst_profiles = np.random.randint(allele_number_min, high=allele_number_max + 1, size=len(ascii_lowercase) * n_markers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([3, 1, 2, 3, 2, 4, 1, 3, 4, 1, 4, 3, 4, 2, 3, 3, 2, 1, 2, 3, 2, 1,\n", | |
" 2, 4, 1, 2, 4, 2, 2, 4, 1, 4, 2, 4, 2, 3, 2, 2, 3, 1, 3, 3, 2, 4,\n", | |
" 4, 1, 2, 1, 4, 4, 1, 2, 3, 2, 4, 1, 4, 4, 2, 4, 2, 2, 1, 2, 4, 3,\n", | |
" 1, 4, 2, 3, 2, 3, 1, 4, 2, 1, 2, 2, 4, 3, 2, 1, 2, 2, 4, 3, 2, 1,\n", | |
" 1, 1, 3, 1, 2, 4, 3, 2, 4, 1, 3, 2, 2, 2, 4, 1, 1, 4, 2, 4, 3, 2,\n", | |
" 2, 3, 1, 2, 1, 4, 3, 3, 4, 2, 4, 4, 2, 3, 2, 2, 4, 4, 1, 3, 4, 4,\n", | |
" 3, 1, 3, 4, 3, 3, 3, 4, 1, 3, 2, 3, 1, 1, 2, 1, 4, 3, 3, 2, 3, 1,\n", | |
" 1, 4, 1, 2, 3, 1, 1, 4, 2, 4, 3, 1, 4, 4, 3, 4, 2, 3, 4, 2, 1, 4,\n", | |
" 2, 4, 4, 2, 3, 1, 1, 1, 4, 3, 2, 1, 4, 4, 2, 1, 1, 3, 4, 1, 3, 2,\n", | |
" 2, 4, 1, 4, 1, 4, 3, 2, 3, 3, 4, 4, 3, 2, 1, 3, 1, 3, 3, 4, 3, 4,\n", | |
" 1, 4, 3, 2, 4, 3, 2, 1, 2, 4, 4, 1, 1, 2, 2, 2, 1, 4, 1, 1, 4, 2,\n", | |
" 1, 2, 4, 3, 3, 2, 2, 3, 4, 1, 1, 2, 4, 4, 3, 3, 4, 4])" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cgmlst_profiles" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Reshape into 2D array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cgmlst_profiles.shape = (len(ascii_lowercase), n_markers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[3, 1, 2, 3, 2, 4, 1, 3, 4, 1],\n", | |
" [4, 3, 4, 2, 3, 3, 2, 1, 2, 3],\n", | |
" [2, 1, 2, 4, 1, 2, 4, 2, 2, 4],\n", | |
" [1, 4, 2, 4, 2, 3, 2, 2, 3, 1],\n", | |
" [3, 3, 2, 4, 4, 1, 2, 1, 4, 4],\n", | |
" [1, 2, 3, 2, 4, 1, 4, 4, 2, 4],\n", | |
" [2, 2, 1, 2, 4, 3, 1, 4, 2, 3],\n", | |
" [2, 3, 1, 4, 2, 1, 2, 2, 4, 3],\n", | |
" [2, 1, 2, 2, 4, 3, 2, 1, 1, 1],\n", | |
" [3, 1, 2, 4, 3, 2, 4, 1, 3, 2],\n", | |
" [2, 2, 4, 1, 1, 4, 2, 4, 3, 2],\n", | |
" [2, 3, 1, 2, 1, 4, 3, 3, 4, 2],\n", | |
" [4, 4, 2, 3, 2, 2, 4, 4, 1, 3],\n", | |
" [4, 4, 3, 1, 3, 4, 3, 3, 3, 4],\n", | |
" [1, 3, 2, 3, 1, 1, 2, 1, 4, 3],\n", | |
" [3, 2, 3, 1, 1, 4, 1, 2, 3, 1],\n", | |
" [1, 4, 2, 4, 3, 1, 4, 4, 3, 4],\n", | |
" [2, 3, 4, 2, 1, 4, 2, 4, 4, 2],\n", | |
" [3, 1, 1, 1, 4, 3, 2, 1, 4, 4],\n", | |
" [2, 1, 1, 3, 4, 1, 3, 2, 2, 4],\n", | |
" [1, 4, 1, 4, 3, 2, 3, 3, 4, 4],\n", | |
" [3, 2, 1, 3, 1, 3, 3, 4, 3, 4],\n", | |
" [1, 4, 3, 2, 4, 3, 2, 1, 2, 4],\n", | |
" [4, 1, 1, 2, 2, 2, 1, 4, 1, 1],\n", | |
" [4, 2, 1, 2, 4, 3, 3, 2, 2, 3],\n", | |
" [4, 1, 1, 2, 4, 4, 3, 3, 4, 4]])" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cgmlst_profiles" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Compute distance matrix\n", | |
"\n", | |
"Hamming distance metric is the proportion of matching alleles between profiles" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dm = sp.spatial.distance.pdist(cgmlst_profiles, metric='hamming')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Lower triangular distance matrix output" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([1. , 0.8, 0.7, 0.7, 1. , 0.9, 0.8, 0.7, 0.7, 0.9, 0.7, 0.7, 0.8,\n", | |
" 0.7, 0.6, 0.9, 0.8, 0.7, 0.8, 0.8, 0.8, 1. , 0.6, 1. , 0.6, 0.9,\n", | |
" 0.8, 0.7, 0.8, 0.6, 0.7, 0.6, 0.8, 0.8, 0.8, 0.8, 0.8, 0.6, 1. ,\n", | |
" 0.9, 0.6, 0.7, 0.9, 0.9, 0.9, 0.5, 0.8, 0.5, 0.8, 0.7, 0.7, 0.7,\n", | |
" 0.8, 0.7, 0.7, 0.5, 0.8, 0.8, 0.7, 0.9, 0.8, 0.8, 0.6, 0.8, 0.8,\n", | |
" 0.5, 0.7, 0.8, 0.8, 0.8, 0.8, 0.8, 0.7, 0.9, 0.9, 0.6, 0.6, 0.7,\n", | |
" 0.8, 1. , 0.7, 0.8, 0.7, 0.7, 0.5, 0.9, 0.8, 0.9, 0.7, 0.8, 0.6,\n", | |
" 0.8, 0.8, 1. , 0.7, 0.9, 0.5, 0.6, 0.6, 0.9, 0.8, 0.9, 0.9, 0.4,\n", | |
" 0.9, 0.6, 0.7, 0.4, 0.7, 0.7, 0.8, 0.6, 1. , 0.9, 0.7, 0.5, 0.9,\n", | |
" 0.8, 0.9, 0.8, 0.9, 0.8, 0.8, 0.8, 0.8, 0.5, 0.8, 0.8, 0.6, 0.8,\n", | |
" 0.7, 0.4, 0.8, 0.6, 0.7, 0.7, 0.6, 1. , 0.7, 0.7, 0.8, 1. , 0.9,\n", | |
" 0.8, 0.9, 0.7, 0.7, 0.6, 0.9, 0.6, 0.6, 0.6, 0.3, 0.7, 0.8, 0.9,\n", | |
" 0.8, 0.6, 0.8, 1. , 0.5, 0.9, 0.8, 0.6, 0.7, 0.6, 0.7, 0.9, 0.9,\n", | |
" 0.8, 0.7, 0.8, 0.7, 0.8, 0.8, 0.8, 1. , 0.7, 0.9, 0.9, 0.7, 0.5,\n", | |
" 0.7, 1. , 0.9, 0.5, 0.6, 0.7, 0.7, 0.8, 0.9, 0.7, 0.8, 0.8, 0.8,\n", | |
" 0.5, 0.9, 0.7, 0.9, 0.7, 0.8, 0.9, 0.8, 1. , 0.9, 0.6, 0.9, 0.7,\n", | |
" 0.8, 0.5, 0.8, 0.3, 0.8, 0.9, 1. , 0.6, 0.9, 0.9, 0.9, 0.9, 1. ,\n", | |
" 0.7, 0.7, 0.8, 1. , 0.3, 0.8, 0.7, 0.6, 0.7, 0.9, 0.8, 0.7, 0.4,\n", | |
" 0.8, 0.7, 1. , 0.6, 0.9, 1. , 0.9, 0.8, 0.8, 0.9, 0.5, 0.8, 0.9,\n", | |
" 1. , 0.6, 0.6, 0.9, 0.8, 0.8, 0.5, 0.7, 0.7, 0.9, 0.8, 0.5, 0.9,\n", | |
" 0.7, 0.6, 0.7, 0.8, 0.8, 0.8, 0.7, 1. , 0.9, 0.9, 0.9, 0.8, 0.8,\n", | |
" 0.9, 1. , 0.6, 0.9, 0.8, 0.8, 0.9, 0.9, 0.9, 0.8, 0.5, 0.7, 0.7,\n", | |
" 0.9, 1. , 0.9, 0.8, 0.9, 0.9, 0.8, 0.8, 0.8, 0.9, 0.7, 0.6, 0.7,\n", | |
" 0.6, 0.5, 0.8, 0.7, 0.5, 0.7, 0.6, 0.7, 0.8, 0.5, 0.5, 0.7, 0.7,\n", | |
" 0.8, 0.8, 0.5, 0.8, 0.8, 0.6, 0.7, 0.9, 0.6, 0.7, 0.7, 0.6, 0.5])" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dm" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Hierarchical clustering of the distance matrix\n", | |
"\n", | |
"Single linkage by default" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"Z = linkage(dm)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[10. , 17. , 0.3, 2. ],\n", | |
" [11. , 26. , 0.3, 3. ],\n", | |
" [ 6. , 24. , 0.3, 2. ],\n", | |
" [25. , 27. , 0.4, 4. ],\n", | |
" [ 4. , 18. , 0.4, 2. ],\n", | |
" [14. , 30. , 0.4, 3. ],\n", | |
" [ 5. , 22. , 0.4, 2. ],\n", | |
" [15. , 29. , 0.5, 5. ],\n", | |
" [13. , 33. , 0.5, 6. ],\n", | |
" [31. , 34. , 0.5, 9. ],\n", | |
" [ 7. , 35. , 0.5, 10. ],\n", | |
" [ 8. , 36. , 0.5, 11. ],\n", | |
" [19. , 37. , 0.5, 12. ],\n", | |
" [ 2. , 38. , 0.5, 13. ],\n", | |
" [ 9. , 39. , 0.5, 14. ],\n", | |
" [16. , 40. , 0.5, 15. ],\n", | |
" [ 3. , 41. , 0.5, 16. ],\n", | |
" [32. , 42. , 0.5, 18. ],\n", | |
" [ 1. , 43. , 0.5, 19. ],\n", | |
" [28. , 44. , 0.5, 21. ],\n", | |
" [20. , 45. , 0.5, 22. ],\n", | |
" [12. , 23. , 0.5, 2. ],\n", | |
" [ 0. , 46. , 0.6, 23. ],\n", | |
" [47. , 48. , 0.6, 25. ],\n", | |
" [21. , 49. , 0.6, 26. ]])" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Z" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Dendrogram of hierarchical clustering of random cgMLST profiles" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEACAYAAABI5zaHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGFpJREFUeJzt3X+cXXV95/HXh4SAGBQsMUh+GJRYGxGHmkKV3TWK0UQ0YVExxF+06kAlSqXrFmulNtii2LrutnlsHdSH2t0RqMujTbehuFuJa+sDTWhGaWDTTVOVAI6R0trZdh0Dn/3jnKmX4c69587cSYYvr+fjMY+559zv93y/59f7fO+5c+9EZiJJKssxR7sDkqT+M9wlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBZp/tBo+5ZRTcsWKFUereUl6XLrzzju/n5mLupU7auG+YsUKdu/efbSal6THpYj4dpNy3paRpAIZ7pJUIMNdkgpkuEtSgQx3SSpQo3CPiHURsS8i9kfE1VOUuTgi7o6IvREx3N9uSpJ60fVPISNiHrANWAscBHZFxPbMvLulzErgfcB5mflQRDx9tjosSequycj9HGB/Zh7IzHHgRmDjpDLvALZl5kMAmfm9/nZTktSLJh9iWgLc2zJ9EDh3UpnnAETEXwDzgA9m5p9Op0NDQzDcp5s6DzwAo6MzX87AQH+WNTAws/qbN8Pg4MyWIemJoV9vqM4HVgJrgEuAGyLipMmFImIwInZHxO5Dhw61XdDwMIyM9KdTo6MwNjb3ljUdIyP9u+hJKl+Tkft9wLKW6aX1vFYHga9l5o+Av42Iv6YK+12thTJzCBgCWL16dU7V4MAA7NzZoGddrFlT/Z5ry5pJ+5LURJOR+y5gZUScHhELgE3A9kll/pBq1E5EnEJ1m+ZAH/spSepB13DPzMPAFuA24B7g5szcGxFbI2JDXew24MGIuBu4HXhvZj44W52WJHXW6FshM3MHsGPSvGtaHidwVf0jSTrK/ISqJBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAjUK94hYFxH7ImJ/RFzd5vlLI+JQRIzUP2/vf1clSU3N71YgIuYB24C1wEFgV0Rsz8y7JxW9KTO3zEIfJUk96hruwDnA/sw8ABARNwIbgcnhflQMDcHwcPvnRkaq32vWPPa5zZthcHDWugV07luvvv51GB+Hk07qz/IWL4bR0ZkvZ2AAHnhg5ssaGKh+T2dZ4+OwYMHcrTM+DgsX9l4HmtdrrTOhlzan0q/jZLKJ/d3OkToGOvWhnSORGf3U5LbMEuDelumD9bzJXhsR34yIL0TEsnYLiojBiNgdEbsPHTo0je4+1vDwj0N8soGB9jtwZKR/odtJp771qtcDt5vRURgbK2NZ4+Nzu87DD8/NOk30c9/OZpvT2Te9OFKZ0U9NRu5N/DHw+cz8YURcBnwWeNnkQpk5BAwBrF69OvvUNgMDsHNn8/LtRvKzpde+TWWiz/1YVr+Xd7SXZZ3+Hx+zvdx+tznb/TySmdEvTUbu9wGtI/Gl9bx/kZkPZuYP68lPAi/sT/ckSdPRJNx3ASsj4vSIWABsAra3FoiIZ7RMbgDu6V8XJUm96npbJjMPR8QW4DZgHvDpzNwbEVuB3Zm5HXh3RGwADgN/B1w6i32WJHXR6J57Zu4Adkyad03L4/cB7+tv1yRJ0+UnVCWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQI3CPSLWRcS+iNgfEVd3KPfaiMiIWN2/LkqSetU13CNiHrANWA+sAi6JiFVtyp0IXAl8rd+dlCT1psnI/Rxgf2YeyMxx4EZgY5ty1wIfAf5fH/snSZqG+Q3KLAHubZk+CJzbWiAifhpYlpl/EhHv7WP/NJcMDcHw8GPnj3y8+r3mFx/73ObNMDg4u/1Sz4buv5/h0dGu5UbGzgBgzZ79PDA+zuj4+JRlBxYubDt/8+LFDJ522vQ6OovuH7qf0eHu2wBgbKTaDnvW7G9UfvHmxZw2eHTXuUm4dxQRxwAfAy5tUHYQGARYvnz5TJvWkTY8DCMjMDDwqNk7B9qEOlRlwXCfg4ZHRxkZG5sykCcM3PDjMBsdH2fs4YdZOG9e43ZGxsYA5mS4jw6PMjYyxsKBztsA4IaBZqEOMDZSrfPjIdzvA5a1TC+t5004ETgT2BkRAKcC2yNiQ2bubl1QZg4BQwCrV6/OGfRbR8vAAOzc2azsmjWz2RPN0MDChew8++zG5dfs2QMwrTpz1cKBhZy9s/n6NLFnzdxY5yb33HcBKyPi9IhYAGwCtk88mZn/kJmnZOaKzFwB3AE8JtglSUdO13DPzMPAFuA24B7g5szcGxFbI2LDbHdQktS7RvfcM3MHsGPSvGumKLtm5t2SJM2En1CVpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUoEbhHhHrImJfROyPiKvbPH95RNwVESMR8ecRsar/XZUkNdU13CNiHrANWA+sAi5pE97Dmfn8zBwArgc+1veeSpIam9+gzDnA/sw8ABARNwIbgbsnCmTmD1rKPxnIfnbyiWToziGG7xp+zPyR734cgDWf+cW29TY/fzODLxyc1b5JT3T3D93P6PBoxzJjI2MA7FmzB4DxB8YZHx1vW3bhwMK28xdvXsxpg6fNoKfNwn0JcG/L9EHg3MmFIuIK4CpgAfCydguKiEFgEGD58uW99vUJYfiuYUa+O8LAqQOPmj9wdftQBxj57giA4S7NstHhUcZGxqYMZXhsYI+PjvPw2MPMWzivURsTF4cjEe6NZOY2YFtEbAZ+FXhrmzJDwBDA6tWrHd1PYeDUAXZeurNx+TWfWTNrfZH0aAsHFnL2zrMbl58YwTetM1F+ppq8oXofsKxlemk9byo3AhfOpFOSpJlpEu67gJURcXpELAA2AdtbC0TEypbJC4D/078uSpJ61fW2TGYejogtwG3APODTmbk3IrYCuzNzO7AlIl4O/Ah4iDa3ZCRJR06je+6ZuQPYMWneNS2Pr+xzvyRJM+AnVCWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQI3CPSLWRcS+iNgfEVe3ef6qiLg7Ir4ZEX8WEc/sf1clSU11DfeImAdsA9YDq4BLImLVpGJ7gNWZeRbwBeD6fndUktRck5H7OcD+zDyQmePAjcDG1gKZeXtm/lM9eQewtL/dlCT1okm4LwHubZk+WM+bytuAW2fSKUnSzMzv58Ii4k3AauAlUzw/CAwCLF++vJ9NS5JaNBm53wcsa5leWs97lIh4OfB+YENm/rDdgjJzKDNXZ+bqRYsWTae/kqQGmoT7LmBlRJweEQuATcD21gIRcTbwCapg/17/uylJ6kXXcM/Mw8AW4DbgHuDmzNwbEVsjYkNd7KPAQuAPImIkIrZPsThJ0hHQ6J57Zu4Adkyad03L45f3uV+SpBnwE6qSVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIK1CjcI2JdROyLiP0RcXWb5/9NRPxlRByOiNf1v5uSpF50DfeImAdsA9YDq4BLImLVpGLfAS4FhvvdQUlS7+Y3KHMOsD8zDwBExI3ARuDuiQKZ+a36uUdmoY+SpB41uS2zBLi3ZfpgPa9nETEYEbsjYvehQ4emswhJUgNH9A3VzBzKzNWZuXrRokVHsmlJekJpEu73ActappfW8yRJc1STcN8FrIyI0yNiAbAJ2D673ZIkzUTXcM/Mw8AW4DbgHuDmzNwbEVsjYgNARPxMRBwEXg98IiL2zmanJUmdNflrGTJzB7Bj0rxrWh7vorpdI0maA/yEqiQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVqFG4R8S6iNgXEfsj4uo2zx8XETfVz38tIlb0u6OSpOa6hntEzAO2AeuBVcAlEbFqUrG3AQ9l5hnAfwA+0u+OSpKaazJyPwfYn5kHMnMcuBHYOKnMRuCz9eMvAOdHRPSvm5KkXjQJ9yXAvS3TB+t5bctk5mHgH4Cf6EcHJUm9m38kG4uIQWCwnhyLiH1Tl+112dPpzxyu83O9V5pWnem8vjoCO2dO75uC6kxr9x+pOkdom83ZjTB1+Wc2qd4k3O8DlrVML63ntStzMCLmA08FHpy8oMwcAoaadEySNH1NbsvsAlZGxOkRsQDYBGyfVGY78Nb68euAL2Vm9q+bkqRedB25Z+bhiNgC3AbMAz6dmXsjYiuwOzO3A58Cfj8i9gN/R3UBkCQdJeEAW5LK4ydUJalAhru6iogTjnYfJPXGcFdHEbEB+J36zfTZbmvxbH34LSKWRcQNs7FsaS6ac+Fe/ymlehARz4yI4xuUO6b+3ShAI+IngHdTfZ3E0oh42ow62rmtJcCvUn29Rd8DPjPvBX43Ip7V72X3w3SO+4n9eaSU9KnziBjooey0MulI75/J5lS4R8R5wBvrx7N2IEXEq+rg6qXOk6bRzr+KiMFe1iUiTu2x/NOB99LgE8GZ+Uj98JkRMb9BO+PAYeDXgI8Bj3Qu3rZ/5zUsej9wJ3A2cFE/t9nEc5n5DeA/R8SepsuetJyfalBmwcR3L0XE+RHxjAZ1FgG39HrMT+zPiFjZa91p3mo7ra7bOOwi4qm9NjKd9elx+W+h2t7PbVD2FGD/dAY2LfvnwibHQUubV0TEul7bm2zOhHu9M88FXgrQy9/JR8TGiHh5RCzsUm7ioHwf8LIelr8FuD4irmtysLZcsZ8FnAW8qcnBGhFnAVuB1/ZwcH8fWA68q8NyXxwRm+rH7wJuAT4NXN1pdJGZ/wh8ieq7g76ZmX/fY+ieCLwtIi7uUi7q/X0M1ZfT/TKwsV/brPVYysxXAvdGxP9quh51O78AfDQiFncpuhz4eET8PnAV1QWyo8w8RPXnw2ubhEib/fknwKci4nUNt9nlwLaI+EREvLRJ0NfnwO9FxIeBd0bEcQ3qvBO4PCKe0q3spHZ6XZ/zIuLSiPjZbqPliLgSuBL4R+DkbsvOzO9TnVtfjYiu5es2/mX/1K6k+jPyJnU3AucDdzcp38mcCPeIOKE+Af8TcEa9g5vW3QT8HrAWuK3LybG8/n078MO6fseDpz5AXw98GPh5qvvPK7t069n17/8CfIVqNPqWLqPL1wC/AzyP6isaOgZ8RCyJiJ+sRwdbgMUdRiInA9dFxLXAi+r1+QzwJODDXU6Im6jC/fUR8Z6mF92IWFVfHL4CdNxemZkR8Uaqk+hXgK9SXeS7bYOJbXYmzbbZMXV7G6i+/uLLDddlA3A5cEVmjnZZl/3AN6m22a2Z+WBEzOt2nGXmP1Htj7si4qQuXZrYnx8EXkD1ja1fpdq33Y6z1wJXAP8RGAPW0eWVUkRcCFwMvJlqAPaczPxhpw5GxGVUH2wczswfNBnt19v5rB7X58XAJ6mOl8uB357qeI6Il1J9yPJFwA3A6d36BJCZfwy8B9jdMOBPBn6zJeBPoNk38C4BfhcYy8zvRLNX11PLzKP6Q7VTfh14dT39CuCDVAd6dKm7HHgD8Ox6+k3AbuBpbcqeCeyjur0wAnwN+GmqMD2hXVvAU6gOnGdQ3Xv+U+BzwK3Ayg59+hbw5np6PtVJ8UWqi0O7dhYDfw48t56+vN7JF05R/slUX638JapQex7VBe78+vl2ddYCfwX813p6AVXofhY4s8F+Ohv4a2BLg7IvAr4NXAY8h+pEfUeXOluB97b07V3ATqpQabc+T6+32apJ22xjp+MGOKbl8Q7gyw3W53LgV+rHxzYof0Z9LP4l8MaW+Qsb1F0H/A1wcpdya4G7gBtattkm4Lfr7d52G1BdPP9dS523U33T64IObb0VuAi4tD6OF9TznzNF+ScBfwi8kup24S/Ux+c7O7SxBPgO1YckG60P1TfW3g78bD29AvgQcNUUbZwCnFI/vg74SP34ImBDg32zvsm+aSk7cZG/tj5en0IV8lPWr/syClzcMq9jDk65rOlU6ucP1a2Lt1O9DHkP1ahiJ3Bul3rvpgrou+v6x9fz39huB9Qb9dnAc+sd+whVsP0PqlsUx0/RznFUo6PbJzY01a2QrVOdEMBrqE7sS1rm3Qr8FvDUNuVPpgrA8+rpY+uT4cvAuinaOJ7q4nQT8P76gNgFLOmwzTYCDwFvaJl3C/D6hvvqrHrdL+tQZgHV9w99td4GG+oT9C7ghR3qXUgVCM9rmXcH1Zu5J06xze4A/vWkbfYXVKOzpgF/y8RJ3qH8eqoL+0+2zHszcGGXeq8BvgFcQDVo2QrMb7Cd1wP/mzaDlDb783vAppZj/K3Ab7Y7zlq28x9RXxTreX8G/FSHdl5Sn1NfmXT+Xc8UFzuqQcceqq8muQ54J9X/heh0EbmI6r2XRutDdYF7mB9feOcDrwI+2WAbb6R6xXt+fd60HaxNd9/UZS+oy05kzc76ePs8cEKHeq+mujBc3KRPUy5nJpX7+UM1wvsA1ZX3EarQmt/uJK0P0M/Vda6neom5ZuLEoRrNn96hrSdRjeCfRnUvbMpArMuvpLq98Px6w98ELO9S51X1Dvr5+qD9InBah/K/VK//mfX0K4H/TnX75LgO9Z5KNSr4APDfgBfV86caub0aOED16uhCqtB9dg/76cypylON2H+d6pXEs+oT+1LgHfU+/QAwb4q6JwG/Uf+cX/fzf3baN1T3tK9p2WavqLfBR6ku8l0Dvt4/n6VD6FKNuK6lCqlXA5dQBcIZDbbXuvo42E1LoDaot5Hq4nhMl3IX1MtvDcTHXAwnbecP1dt5LdXFdxf1iHaKOgvr8+W36vPsLVRvfk/5io9q8PEz1CFINQq/nQ6hNs312Qjspx5IUV2Ivl6fE532/7nAP1O9in9epz5Nd9/UZc+nushdUa/Lk4FlDeqtp/oa9X/bS98etYzpVpyNH6pR8vFUfxI31Uu+iZdvn2o5iK6luve6ttNJWpePegPvoh71NezXL1ON8vc2PUnrA20n1cv/F3Qpu7Rej1vrE28f1cvO7d3qtizj/cBQg3IXUo14/gh4Vh/331KqV1G7qULzMuCi+rm30eUiQvXXGFuobjd9ETirQXsfqsv+BtVto1fUbX+4Uyi0LGNNk/1JdWvu8npffr5b3ybVXQQsmsb27Hobpy43EQSva1h+Yjt/kWok2fX4qtf/Mqo3Oz8HPL9hW8fU+/4uGtz+m+b6vAb4AdU/CrqZZrdYnkb1SrHRiH26+6Yu+0qq/4PR00i8zrNpn59z6rtlWv5qolu5i6jur/5SZn6+frPmeqrR4TVZvTnVbRn/nur+8+SvL56q/LHAqcAjTevU9U6ges/wnxuUfQrwYqrbQDuo3gu4AVibHd7Im9hu9Rs4P0d1u6BjexHxEuDbmfmtpuvSVES8gGqUeyJVqHX9k7NJ9Z9MNeoaa1D2RKpXDGcCezLz9qj+NeSJmfn3vfe+a3sLALL6r2RzRkSsBf4mMw/0UGfivab/20OdYwEy80c9tPEG4I7MvKeHdnpan/rN2K1U5/RHJ96I7JQnEXFcdnljuF+ms39m3OZcCvdeRMQFVAFyXUvAn5zVn5U1qT8/q/8aNSfV7+xfR3V/+xsNygfVLYO/zcy/mu3+NejP06lekl5J9RL7W0eo3XmZ+fCRaEvNNB209aGdV1C9f/buzLxlttub6x634Q4QEeup/vnHVZn5B0e7P/0U1YceFmTmt492X2YiIo5tOsqTZupojJDnqsd1uIM7U5LaedyHuyTpsebEJ1QlSf1luEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QC/X8r2NMi3RJj4QAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dend = dendrogram(Z, leaf_label_func=lambda x: ascii_lowercase[x])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Complete linkage with Hamming distances might produce a nicer looking tree with distances that make sense to most people." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEACAYAAABI5zaHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGKxJREFUeJzt3Xu4XXV95/H3l3ATDyA2GCQhghKqAfHEZsDLzONhEA2oCeOFQrwUpR6oRJ3iOKVeqBO1KLadTpWpPVQfRmcOl3Z4bGYaxJlK1KkDQ2iOIDD0SWmVED2gVdszdkzR7/yx1hk2J/uy9sk+l/zyfj1Pnpy9z2/t9dtr/dZn/fbaZ+9vZCaSpLIctNAdkCQNnuEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKtDBC7XipUuX5oknnrhQq5ek/dLdd9/9vcw8tle7BQv3E088ke3bty/U6iVpvxQR32rSzssyklQgw12SCmS4S1KBDHdJKpDhLkkF6hnuEfHZiHg0Ir7Z4fcREb8XETsj4p6IeOHguylJ6keTmfv1wLouvz8XWFX/GwV+f9+7JUnaFz3DPTO/CvxtlyYbgM9l5Q7gaRHxzEF1UJLUv0F8iGk58HDL7V31fd8ZwGPvk7ExGB9f6F7sv77zHZicXOhezL/h4blfx2LatvPxfJvYuBFGRxe6F+WY1zdUI2I0IrZHxPbHHntsztc3Pg4TE3O+mmJNTsLU1EL3okxu2yebmHAiNmiDmLk/ApzQcntFfd9eMnMMGANYu3ZtDmDdPQ0Pw7Zt87Gm8oyMVP+7/QbPbftk09tDgzOImfsW4C31X828CPhRZi74JRlJOpD1nLlHxA3ACLA0InYBvwEcApCZnwa2AucBO4EfA2+dq85KkprpGe6ZeVGP3ydw+cB6JEnaZ35CVZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAINosye5sFCFPuerj873yXQLJS8sBbrWJuLouKDLA6+2MatM/f9xEIU+x4eHuzgb8JCyQtvsY61xVxUfDGOW2fu+5EDodi3hZIXh8U41hZzUfHFOG6duUtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAjUK94hYFxEPRsTOiLiyze9XRsTtEbEjIu6JiPMG31VJUlM9wz0ilgDXAucCq4GLImL1jGYfAG7OzDXAhcC/H3RHJUnNNZm5nwHszMyHMnMPcCOwYUabBI6qfz4a2D24LkqS+tWkEtNy4OGW27uAM2e0+RDwpYh4J/BU4OUD6Z32C4OsuTkXdVsXW23L+TSbfTPbfXAgb+fFaFBvqF4EXJ+ZK4DzgM9HxF6PHRGjEbE9IrY/9thjA1q1Ftoga24Oum7rYqxtOZ9ms29msw8O9O28GDWZuT8CnNBye0V9X6tLgHUAmfk/I+JwYCnwaGujzBwDxgDWrl2bs+yzFqHFWHMTFmdty/k2H/vG7bz4NJm53wWsioiTIuJQqjdMt8xo823gbICIeB5wOODUXJIWSM9wz8zHgU3AbcADVH8Vc19EbI6I9XWz9wBvj4hvADcAF2emM3NJWiBNLsuQmVuBrTPuu6rl5/uBlw62a5Kk2fITqpJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBWo0fe5LwYW+tV8jQH3/4GtlKzZb2buFvrVfIwB979KyZr9ZuYOFvrV3I8B97+gjKzZb2bukqTmDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVKBG4R4R6yLiwYjYGRFXdmhzQUTcHxH3RYTlDiRpAfUs1hERS4BrgXOAXcBdEbElM+9vabMK+HXgpZn5g4h4xlx1WJLUW5OZ+xnAzsx8KDP3ADcCG2a0eTtwbWb+ACAzHx1sNyVJ/WhSZm858HDL7V3AmTPanAIQEX8OLAE+lJlfHEgPCzS2ezfjk5N9LTMxdTIAIzt27vW77+zZw+SePW2XGx4a6viYG5ctY/T44/vqRynGxsYYb1PAcmLidwEYGfmXbZfbuHEjo1bPnlO7x3YzOb738TE1UR0DO0b2PgYAlm1cxvGjB+Z4bmdQNVQPBlYBI8AK4KsR8fzM/GFro4gYBUYBVq5cOaBV73/GJyeZmJrqGrwzDV/XfkADTO7Zw9RPf8rQkiWNH29iagrggA338fFxJiYmGJ5R1Xh4uH2oA0zUVZMN97k1OT7J1MQUQ8NPPj6uG+58DExNVOPZcH9Ck3B/BDih5faK+r5Wu4A7M/Mfgb+OiL+kCvu7Whtl5hgwBrB27dqcbadLMDw0xLY1awbyWCM7dgD09XjTyxzIhoeH2dZHFeQRq2fPm6HhIdZsaz6ed4w4nmdqcs39LmBVRJwUEYcCFwJbZrT5AtWsnYhYSnWZ5qEB9lOS1Iee4Z6ZjwObgNuAB4CbM/O+iNgcEevrZrcB34+I+4Hbgfdm5vfnqtOSpO4aXXPPzK3A1hn3XdXycwJX1P8kSQvMT6hKUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAINqsze/mVsDNrUzwSgrqFJuxqaGzeCJdbUh7G7xxi/t02t1u/WtVqv71Cr9fkbGf2FxTXWutX+7Vbjd7HW6t29e4zJyb33zdRUtW927Gi/b5Yt28jxxzffN7Op1zuIWr0HZriPj8PEBMyonwmwrVMNzbp+puGufozfO87EdycYPm5GrdYru9Rq/W5dq3WRhXu32r+davwu5lq9k5PjTE1NMDT05H1z3XWd983UVLVv+gn3fuv1DqpW74EZ7lAFex/1M7F+pmZp+Lhhtl28rXH7ketH5qwv+6rf2r+LvVbv0NAwa9Zsa9x+x46RWa2nn3q9g6rV6zV3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCtQo3CNiXUQ8GBE7I+LKLu1eFxEZEWsH10VJUr96hntELAGuBc4FVgMXRcTqNu2OBN4N3DnoTkqS+tNk5n4GsDMzH8rMPcCNwIY27T4MfBz4vwPsnyRpFpqU2VsOPNxyexdwZmuDiHghcEJm/mlEvHdfOjSbgsKLsZhwiXaP7WZyfO8CyVMTVXHkHSN719FctnEZx482r5/ZqWgxdC9c3G/RYql0+1xDNSIOAn4HuLhB21FgFGDlypVt2/RbUHixFhMu0eT4JFMTUwwNP7lA8nXD7YsjT01UxZH7CfdORYuhc+Hi2RQtlkrXJNwfAU5oub2ivm/akcBpwLaIADgO2BIR6zNze+sDZeYYMAawdu3a7LTCfgoKL+ZiwiUaGh5izbZmBZJ3jMyuOPJ8FS2WStbkmvtdwKqIOCkiDgUuBLZM/zIzf5SZSzPzxMw8EbgD2CvYJUnzp2e4Z+bjwCbgNuAB4ObMvC8iNkfE+rnuoCSpf42uuWfmVmDrjPuu6tB2ZN+7JUnaF35CVZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpALtcw1VdTa2ezfjk3sXlJ6YqmqLjuxoX4Zu47JljB7fvO6oxNgYjLcpLD5RFRVnpE392Y0bYdS6s6Uy3OfQ+OQkE1NTDA89uaD0zNutpoPfcFdfxsdhYgKGn1xYfNtw+6LiTFRFxQ33chnuc2x4aIhta5oVlIbOs3mpp+Fh2LatWduRkbnsiRYBr7lLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUoEbhHhHrIuLBiNgZEVe2+f0VEXF/RNwTEX8WEc8afFclSU31DPeIWAJcC5wLrAYuiojVM5rtANZm5unAHwPXDLqjkqTmmszczwB2ZuZDmbkHuBHY0NogM2/PzB/XN+8AVgy2m5KkfjQJ9+XAwy23d9X3dXIJcOu+dEqStG8GWkM1It4ErAVe1uH3o8AowMqVKwe5aklSiyYz90eAE1pur6jve5KIeDnwfmB9Zv6k3QNl5lhmrs3Mtccee+xs+itJaqBJuN8FrIqIkyLiUOBCYEtrg4hYA/wBVbA/OvhuSpL60TPcM/NxYBNwG/AAcHNm3hcRmyNifd3sE8AQ8EcRMRERWzo8nCRpHjS65p6ZW4GtM+67quXnlw+4X5KkfeAnVCWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFahTuEbEuIh6MiJ0RcWWb3x8WETfVv78zIk4cdEclSc31DPeIWAJcC5wLrAYuiojVM5pdAvwgM08G/i3w8UF3VJLUXJOZ+xnAzsx8KDP3ADcCG2a02QD8h/rnPwbOjogYXDclSf1oEu7LgYdbbu+q72vbJjMfB34E/NwgOihJ6t/B87myiBgFRuubUxHxYMe2b+1v4t9v+7pD87LMbF7CLOZl+l5oXlYyu2Vm8wJzVsvMYnzOy5hexOO5tBX1O266tH9Wk+WbhPsjwAktt1fU97VrsysiDgaOBr4/84EycwwYa9IxSdLsNbkscxewKiJOiohDgQuBLTPabAF+qf759cCXMzMH101JUj96ztwz8/GI2ATcBiwBPpuZ90XEZmB7Zm4BPgN8PiJ2An9LdQKQJC2QcIItSeXxE6qSVCDDXT1FxBEL3QdJ/THc1VVErAc+Wb+ZLh1QIuKEiLhulssuW8gPcy6KcK//fHK2yw4Psi8LLSKeFRGHz8N6em7ziPg54F1UXyexIiKePg/9mtODISIO6mc9+zI258P089nf17FYZebDwKci4tn9LBcRy4EPUH1dy4IE/ILvtIhYCuycTXBExFuAWyLiuX0ut6rfDR4R/zQiRvtZLiKO67P9M4D30uPTvRFx6PT3+0TE2RHxzKbrqJd5KfDG+udu/dsDPA78BvA7wM/6WU/LuvpxfL1c41CNiKObts3M6efwrIg4uNvzj4hjqcbXvB2cEXFefVJtZPr5RMT5/YyDiLg8Itb1uY7ZHDd9X9KLiKf0u8ws1vG8Bm0CIDO/Afx+ROzoYxW7gbuBNcBr5zI3OlnwcM/M7wHvBL4eEcc0XS4i3g28G/h7oJ/lNgF/CnwmIl7fayO2zFqeDZwOvKnJho+I04HNwOv62FHfA1ZSbY9uVgK/GxGfB66gCuFG6r6cCZwF0O3zCJn598CXqb476J7M/GGfg/RI4JKIuKBh+03ApyPiY8A7IuKwBsu8A7gsIo7q0e4lEXFh/fM7gVuAzwJXdpqZZuZjVH/We85sX7VExIaIeHlEDPVoN30y+3Xgnzd43P//fGrvpvpT5UZ9As4G7m+6jnqbNT5u6mUuA66NiD+IiLOaBH09Bq6JiKubnrQj4qURcXFEvKjJq4yI+BXgExGxrFu71mMjM18JPBwRX23w+FEvexDVly3+GrBhDnOjrQUPd4DM/C/ArwLbmwR8RJxF9WGpFwPXASc1WU9U149Pp/qGy6/Xy7+lx0Z8Tv3/fwS+RnUm7rpMRLwG+CRwKtXXLXTdURGxPCJ+vp4hbQKWdXs1kpk7gXuoQvfWzPx+RCxpcKI6oh50vwecXB9IvdxUr+cNEfGrTT+cFhGr65PD14BVDdqfD1wAvJnq5HNKZv6kxzKXUn14bjwz/67HbP8Y4OqI+DDVfn8DcD3wFOBjXQL+x3WbeyPiab2ex4z+XQh8GjgHuK3HCWJl/f/twE/q5bvtz2OA32wJ+CNo9i2vy4FPAVOZ+e0er16mt9mHgBfQx3ETEa8DLgf+HTAFrKPHDLY+Ub8B+BjwNqr3erqOnYh4CfCHVJOVy4Df7hbwdQZcBlyemZPdHrtlmYMAMnM91demfKVb+8zMiHgj1STtfVTb7Cx658B0bpxGg9zoKTMXzT+qwfNXwDE92i0FltY/Xw18vP75tcD6DsssB75N9SEsgOlP2/42cCn13/zPWGYl8DfAm+vbB1OFz5eoBl+7ZZYB/wN4bn37MqqD6fwO7Z9K9TXJX6536KlUgXB2/fu9lqnvPxl4E/AXwBtb7h/q0P4s4N8Ar65vvwL4EFVwtV3HjOXXAH8JbGrQ9sXAt+rtegrV4H57j2V+qd5/F9fb99D6/lM6tH8K8AXglVSXsX6l3m7v6LKOc4BvAv+pZQysovpG09N69G9dk7E5Y+z8IvCc+vabgO3A09u0PQ14kOrS1wRwJ/DCeiwc0WUMnMsTJ/kPA88AjqIK+Y79rLfzJHBBy32d1nEOcC9wXdPjpm73PuBftSzzy1TfKHtoh/ZHUYX0M6ne5/ki8DngVmBVh2XOoDoZvqi+fSLwEeCKLs/9MuB99c+HNNmXdduDWn7eCnylR/vNwHtbnv87gW1UE5h2OfAMqtxY3dLPT9X7tufx2bYPs1loLv/VA/Z/tzsIOrTfQDXbPZv6qxJ6DOrdwIXTO4wqVH4TOLrDMq+hCtCLWu67FfitdstQzXa+Drx0egBRhc5XgHUd1nF4fTDfBLy/PvDuApY3eP6vAb4BvIoqsDcDB7dp9+z6ALuf6lXS5fVgO7OPfXM61aWjS7u0OZTq+4e+Xm+39XUQ3Av8QpflXkYVnl9rue9dwDWdDkKqk+EOqq+/uBp4B1XtgbYB0jJefgD8Yst9twBvGNTYrPt9Z72tfxk4vL7/jbQ5QdTj8DnAc+vn8TOqE85/o7p0dHiXdb2q7tP0Mtvq53MDcESX5V5NdWK4oMHz3gA82udxcz7wJ9RhVd/3Z8DzuqznMKpXCLfXt6Meb5vb7VOqE89PeSKsDwbOA/6wxz78IvDzLfe9GTi/wXZoDfhbqCeVXZ7/F4BTW+67g+qPE45s0/6Y+vf/rL49nRt/TnWVou+A76vxfP2rB9NftG7MLm3PBP6BasZzaoP2r6oHdetA3Wtjz1jmvHqZt1GdIL4EHN+l/XuAD1LPBqlml/+V6jLAYV2WO5rqDP5B4D8DL54e5D36t67u3/bWg6lD21Pqx/9IHQg31QdFo8FDNct8ToffvZjq1cGpVCeTLVQz8bfX6/ogsKTDskNUM9ffAkaAt1C9IdVxRk11Uvwn1GFLNaO8nS6hVrd7NfAQ1SuX86lOPG2fU79js368z9Xb+RqqyxIj1Cdcqtn8SV0e/yn1dng61TX0Jif4s6lOcpfX4/mpwAkNljuX6qu6/8WgjxvgafUY+yhVCK+nmrAs7bGeVVSX8p5f76ebgJU99sdO6skX1SThf9XHUbsZ8lFUr3Kurh//orpfJzfc/wfV/7+N6mS610Sq5fl/tP53dr2u/95tf1K9f3YVT+TGK6hy4BNUE4O+Ar5xw/n+R4fLC23aPZ3qDNlxxt5lUL++j2VeRjUr2gq8oEfbFfUAurXeuQ9SvYTc0mvZlsd4PzDWR/+OBY5t2PYwqmD8AB0ue8xyn62gmqlurwf/pcBr699dQo8ApXpJfinVG3efA57fcL0H1Y9/Lz0ur7Qscz7VrO9PgGcPYmzyxKW/z9S3D6/HwSfrgGsbBC3LB1Uw30U9g+ujT6+kqrXQcyY+Y7lzmj7/fo8bqr982kQ1Gbqlydivx+avUb1quY8ek5V6mdcAf0dVKOhmOlyanTHOLquP5RuA02cx1kd69a3l+X+53gZd11MfPx+p236U6jLoK+pj6WP0mITO/FfEd8tExGHZ4823NsucA/xVZj7UxzJHUL1f8g8N2h4FvITqZeZWqmun1wHnZJc3cqbfaa/fKHsr1cvFnuvrR8u7+XMiIl5ANTM6kuqE0++fqh4CkJn/2LD9EVQz4jsy84E+1vMy4FuZ+Tf99K/HY76W6lrpezLzhvpN3muoXrlcldUbtL0e419TvS8w86u1ey3X95ju1z4cN5GZ/6dh+0OA44CfNd0G9Rulm6m22yem34jsNs6j/mBeVhXm5kxEPJXq+U81aHsk1Svg04AdmXl7VKVOj8zMH/a13hLCfbGr/7rnaqpr1d9o0D6oXsb9dWZ+c677Nxei+pv9s6n+RO/CQQZoh/XN6QmrHxHxKqr9fXVLwB+T1Z9WNln+4KwqmqkPEfEKqvco3pWZtyx0fwYhIpZk5k9ntewiOR6KFtWHSw7NzG8tdF/mW0Qc0nQGXpKIOJeqMM0VmflHC92fA8V8vHrZXxju0hwxaLSQDHdJKtCi+ISqJGmwDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUoP8HqrkHRs1izB4AAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dend = dendrogram(linkage(dm, method='complete'), leaf_label_func=lambda x: ascii_lowercase[x])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Complete linkage with Euclidean distances" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAEACAYAAACTXJylAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGAlJREFUeJzt3X+UZGV95/H3d2ZAkFbR0A4OiCOCGBWtibMSQ7I2zqIgCKxhzdBKgkoagxiI2UQTN5sEdI0mcfFET7KNGjSxiInBbI6rru5KS9SVdYapOAq4B4muOKFtz65H2rNnXcizfzy3oOmpH7e6u6r66X6/zunTv+6Pb92693Of+9T9ESklJEnl2DLuAiRJgzG4JakwBrckFcbglqTCGNySVBiDW5IKY3BLUmEMbkkqjMEtSYXZNoyJHnfccWnnzp3DmLQkbUj79+//Xkppss6wQwnunTt3sm/fvmFMWpI2pIj4Vt1h7SqRpMIY3JJUGINbkgpjcEtSYQxuSSqMwS1JhTG4JakwBrckFWYoF+BsJrOz0GyOuwptJtPTMDMz7io0Tra4V6nZhFZr3FVos2i1bCjIFveaaDRgbm7cVWgzmJoadwVaD2xxS1JhDG5JKkzf4I6I0yKiteTrBxFxzSiKkyQdrm8fd0rp60ADICK2At8BPjbkuiRJXQzaVbIH+EZKqfZ9YyVJa2vQ4N4L3DSMQiRJ9dQO7og4ErgA+Ksu/5+JiH0RsW9hYWGt6pMkLTNIi/tc4PaU0nynf6aUZlNKu1NKuycnaz02TZK0AoME9yXYTSJJY1cruCPiGOBs4ObhliNJ6qfWJe8ppR8CPzbkWiRJNXjlpCQVxuCWpMIY3JJUGINbkgpjcEtSYQxuSSqMwS1JhTG4JakwBrckFcbglqTCGNySVBiDW5IKY3BLUmEMbkkqjMEtSYUxuCWpMAa3JBXG4JakwtR6dJm0Wc3OQrM57ioe1mrl71NTYy3jMNPTMDMz7io2j7oPCz42Ij4aEXdFxJ0R8YJhFyatB83mw2G5HjQa+Ws9abXW185tM6jb4n438KmU0sURcSTw6CHW1Nd6agWttxaQLZ+112jA3Ny4q1i/1su6v5n0bXFHxOOAfw68HyCl9KOU0veHXVgv66kVtJ5aQLZ8pM2hTov7qcAC8KcR8VxgP3B1SumHQ62sD1tBh7Pls3mN8yh03Eedm/Eos04f9zbgJ4A/TintAn4IvHn5QBExExH7ImLfwsLCGpcpqZdxHoWO86hzsx5l1mlx3wvcm1K6rfr9o3QI7pTSLDALsHv37rRmFUqqZTMehW7Wo8y+wZ1Sui8ivh0Rp6WUvg7sAe4YfmnlG/Xh67gOWTfjoao0TnXPKnkD8OHqjJJ7gFcPr6SNo334OqrDyHEcrrZ3Fga3NDq1gjul1AJ2D7mWDWmjH75u1kNVaZy85F2SCmNwS1JhDG5JKozBLUmFMbglqTAGtyQVxuCWpMIY3JJUGINbkgrjo8skDc2w79czqvvzrLf78djiljQ0w77d7ChuKbsebx1ri1vSUJV+v571eD8eW9ySVBiDW5IKY3BLUmEMbkkqjMEtSYUxuCWpMAa3JBXG4JakwtS6ACcivgncDzwIPJBS8sHBkjQmg1w5eVZK6XtDq0SSVItdJZJUmLrBnYBPR8T+iFhH98iSpM2nblfJT6eUvhMRTwQ+ExF3pZRuXTpAFegzACeddNIalylJaqvV4k4pfaf6/l3gY8DzOwwzm1LanVLaPTk5ubZVSpIe0rfFHRHHAFtSSvdXP78YuHbolWmoZg8dojk/v+rptBZPAWDqwN2rms709u3M7Nix6nqkXmb3z9I8ONjNtVv3XQ/A1I3X1B5n+vRpZp43vF7lOl0l24GPRUR7+GZK6VNDq0gj0Zyfp7W4SGNiYlXTadywusAGaC0uAhjcGrrmwSat+1o0jq//9IXGm+sHNkDrvvzkiLEGd0rpHuC5Q6tAY9OYmGBu165xl8HUgQPjLkGbSOP4BnOXzQ1t+lM3Tg1t2m2eDihJhTG4JakwBrckFcaHBa/U7Gz/Rz+38qfRTNX4cGN6Gma8tklSfwb3SjWb0GrlR1h3Mdeo+Wl0K38KbXBLqsPgXo1GA+bmVj+dqanVT0PSpmEftyQVxuCWpMIY3JJUmLH3cY/q3gEw/PsHSGvt0KFZ5uf7bx+Li3mbOHCg9zaxffs0O3a4DZRu7ME9insHwGjuHyCttfn5JouLLSYmem8fN9zQf5tYXMzbgMFdvrEHNwz/3gEwmvsHSMMwMdFg1665VU/nwIGpVU9D64N93JJUGINbkgpjcEtSYdZFH7c2h25P3Wk/SKHbfbl9Oo70SLa4NTLtp+4s15iY6Pokntbi4po8Yk3aSGxxa6QGfeqOT8eRDmeLW5IKUzu4I2JrRByIiI8PsyBJUm+DtLivBu4cViGSpHpqBXdEnAicB7xvuOVIkvqp2+K+Hvh14J+GWIskqYa+wR0R5wPfTSnt7zPcTETsi4h9CwsLa1agJOmR6rS4zwQuiIhvAn8BvCgi/nz5QCml2ZTS7pTS7snJyTUuU5LU1je4U0q/kVI6MaW0E9gLfDal9KqhVyZJ6sgLcLRpHJo9xHxzsKswF1unAHBg6u6Bxts+vZ0dM+O5TL/bwxfa9+PudntXH7JQjoGCO6U0B8wNpRJpyOab8yy2FplodL68vpMbGoMFNsBiK1/WP67g7vbwhV4PY/AhC2Wxxa1NZaIxwa65+pfcr8SBqfFfpj/owxd8yEJZvORdkgpjcEtSYQxuSSqMwS1JhfHDyQ2m21Nmluv31Jk2nz4jrT+2uDeYbk+ZWa7XU2fafPqMtD7Z4t6ABn3KTDc+fUZan2xxS1JhNlyLe3b/LM2Dh1/u27ovXxk2deNUx/GmT59m5nleNSZp/dtwLe7mweZDIb1U4/gGjeM7X/Lbuq/VMewlaT3acC1uyCE9d9lc7eG7tcIlaT3acC1uSdroDG5JKsyG7CpZl2ZnodmlH71V9clPTR3+v+lpmPFDU2m9GeeJELa4R6XZfDigl2s08tdyrVb3sJc0VuM8EcIW9yg1GjA3V3/4Ti1wSevGuE6EsMUtSYWxxS1pRWZnZ2n26cprta4HYGrqmr7Tm56eZmYNPs/p1vcMvfufS7oIzxa3pBVpNpu0un1uU2k0rqHR6B/arVar706gdl1d+p6he/9zaRfh9W1xR8RRwK3Ao6rhP5pS+u1hFyZp/Ws0GswN8rlNF1Nr/HnORr8Ir05Xyf8FXpRSWoyII4DPR8QnU0pfGnJtkqQO+gZ3SikB7Rs8H1F9pWEWJUnqrtaHkxGxFdgPnAK8N6V021Crkta5Q7OHmG92fsjEYiu3cw5MHX4/8+3T29kx4xOFtDq1PpxMKT2YUmoAJwLPj4hnLx8mImYiYl9E7FtYWFjrOqV1Zb45/1BALzfRmGCicfjThRZbi13DXhrEQKcDppS+HxG3AOcAX132v1lgFmD37t12pWjDm2hMsGuu/pOGOrXApZXo2+KOiMmIOLb6+WjgbOCuYRcmSeqsTov7ScAHq37uLcBfppQ+PtyyJG1E3S7aaZ8P3um0wLW6MGcjqXNWyVeA1T95VtKm175op7HspmrLf29rB7rB/Uhe8i5ppAa5aGetL8zZKLzkXZIKY3BLUmEMbkkqjMEtSYUxuCWpMAa3JBXG4JakwhjcklQYg1uSCmNwS1JhDG5JKozBLUmFMbglqTAGtyQVZmS3dZ3dP0vzYIcbqN9X3UD9xqmO402fPs3M87wX7zDNHjpEc/7wZyG2FvMzFacOdH7k1vT27czs8MG30qiNrMXdPNh8KKSXahzfoHF8l5uo39fqGPZaW835+YdCeqnGxASNicMfegs51DuFvaThG+mDFBrHN5i7bK728N1a4Vp7jYkJ5nbVf9BRt1a4pOGzj1uSClPnKe9PjohbIuKOiPhaRFw9isIkSZ3V6Sp5APjVlNLtEfEYYH9EfCaldMeQa5MkddC3xZ1S+seU0u3Vz/cDdwInDLswSVJnA/VxR8ROYBdw2zCKkST1Vzu4I2IC+GvgmpTSDzr8fyYi9kXEvoWFhbWsUZK0RK3gjogjyKH94ZTSzZ2GSSnNppR2p5R2T05OrmWNkqQl6pxVEsD7gTtTSu8afkmSpF7qtLjPBC4FXhQRrerrpUOuS5LURd/TAVNKnwdiBLVIkmrwyklJKozBLUmFGelNpqRRODR7iPnm4XcuXGzlOyAemDr8Blnbp7ezY8Zb1KoMtri14cw35x8K6aUmGhNMNA6/Te1ia7Fj0EvrlS1ubUgTjQl2zdW7TW2nFri0ntnilqTCGNySVBiDW5IKY3BLUmEMbkkqjMEtSYUxuCWpMAa3JBXG4JakwhjcklQYg1uSCmNwS1JhDG5JKozBLUmFMbglqTB9gzsiPhAR342Ir46iIElSb3Va3DcC5wy5DklSTX2DO6V0K/C/RlCLJKmGNevjjoiZiNgXEfsWFhbWarKSpGXWLLhTSrMppd0ppd2Tk5NrNVlJ0jKeVSJJhTG4JakwdU4HvAn4b8BpEXFvRLx2+GVJkrrZ1m+AlNIloyhEklSPXSWSVBiDW5IKY3BLUmEMbkkqjMEtSYUxuCWpMAa3JBXG4JakwhjcklQYg1uSCmNwS1JhDG5JKozBLUmFMbglqTAGtyQVxuCWpMIY3JJUGINbkgpjcEtSYWoFd0ScExFfj4i7I+LNwy5KktRdnae8bwXeC5wLPBO4JCKeOezCJEmd1WlxPx+4O6V0T0rpR8BfABcOtyxJUjd1gvsE4NtLfr+3+pskaQwipdR7gIiLgXNSSpdXv18KnJFSumrZcDPATPXracDX175cSdqwnpJSmqwz4LYaw3wHePKS30+s/vYIKaVZYLZWeZKkFavTVfJl4NSIeGpEHAnsBf52uGVJkrrp2+JOKT0QEVcB/xnYCnwgpfS1oVcmSeqobx+3JGl98cpJSSrMhgnuiHj0uGuQpFHYEMEdERcAf1R9eLppRcT2iIhx1yEpi4gnR8QNaz3d4oM7In4M+GXgHcCJEfGEMZf0CBGxpfo+1ECNiBOAf0O+JUGtebVrG4URvP7i1+W2iKhzmm5xIuIpEXHUgOM0ag636mU2jOWeUvo28J6IOHktpzvKDff4QTfeiPjpiJjpM96PgAeA3wbeBfxTjeke2b7fSkTsiYgnDVLXIFJK7XqeEhHbBlkGEXHqAMMfAvYDu4CX1xmvXduA81lpt9SOatyBNo6IOLPOcEtey0WDvp8R8fqIOGfAcY4eZPgBpjsJ3LyaHd2g21pEvLRqAA06n8cNMOwTgV8Das8nIn6evCye0We444C7V9Noq9azV1Y/1234/Hif/wdASunvgT+OiAMrrW+5kQR3RDwHuBb42ToLZUnr6WTgOcCruo2XUrof+Cz5/ilfSSl9v8Y8TgKuj4g/A95IDv+BRMSFEfEvImKiy/9/KiL2Vj+/AbgZ+ADw5jqtw+oUzP8EvD8iLu71miIiUj49aAv5RmBvAi7sNk6H2mrNpxr+dcB7I+I/RMRZdUK8ei1/EhG/B1wZEY/qN0413mOA10bEK3oM89BrqVxNPm21loi4ENgD3DHAOFcB74yIt9cNr4g4MyIui4if7PX+p5QWyNdKnL2SIBpkW1uyE/0N4EUDzudK4HUR8diao3yPvN29oeb0rya/l/cDj+81bErpe9V0vxgRPYftMq8AzgDOqqbX91S7iPgl4PcjYnuPutKSn18CfDsibh20vm4TH+oX8DLgc8AXgU8DF1OdhthjnFOr71uAnyO3pH+h23jAU8gb353Ar9Ss6w+AHwBXVb9v7VfXknH3Av9I7p75AvCEDsOcB/wDcB3QBJ5G3jiuBd4JbOkx/QvIV6E+Dbgc+MNer78a55XAAeC5wPXAu7st6yW1/Q7wvrrzAX4WOAg0qmHfAbyqT10XAbcCxwK3AO+puYyfWX1/NfCWHsOdB9wD7K1+vw04qeY82vfh+VD1+7Ya6+aV1fp8AjAPfKi9vvYY56eqdfODwI3Av+/1/lfjXEi+QvnYFWxrX6izrQEnV9+vBS6ofu67DQBXVMv5ye3l1mcZn1b9fBLwp8Az+kz/LODvgCOBq4Dpmq//XOAbwOMHWGaPXvLef54qD/qMcwHw9+RL1OvMY8uSnz8BfK5ufV2nudoJ9Cn4idXCaG+ErwPeU62U3cLhJOCbwKVLFuil1Yr4mj4r4i7gf9Rc+KdUoXM78Molf5/oM95J5J3J06rfXwXso3N4nw18Ffhw9fuRwKnVBvzsHiv6/yRf6NQeZy85KK/osdyuBX5tyThvAOaAV3Qap6rtIHBD3fkAvwn86yXDX06+W+SRPZbXLwAvBy6r3sMjq78/vcc4LwC+VdXxdPJO/xd7DH8u8JVqvbquWu8eS97x99yIq9rmgVcs+Vu3ZfxY8o7uSeTPVT5FDu5P0iW8yXfXvAX4yer3ncBbgTfWWEfPoWYQAdvJ29ozlm1rF3V5L59Nvp/Qu4AWOYh/AngW8Ogey+Bo4G+Al5C7PX4J+BPgyg7DHkPeSX2WfB+jZ1XD7umznI8Djqt+fjvwjiXv1QV9lkPt8CbvIH4XOL/6/cXkxszR3Wpbsmx/s/r5iH7zqYZb0/Be8Yg1i3088CXgZ9ovsnrjvkCP1gC55XA7cMmSv32S3Ep+XJ95Pod8WHZFzRpfRt57nle9cdfSpQVRbay3kQ+rLweOqv7+ym4rCzlM/jfwc0v+djPwr3rU9HJyn3W7FbmFHID/rtvrrzbQvwGeteRvXyK3ih/TZZwLge/WnU81j/9ItSOu/vZfgR/v8VpeWC2bv1u2HN/ZaaUn7xBOJIf17eTWzR+SdzLP6zGf84C7yJ9xfJC807oZuImqVdVj3PPJwf+KXsNVwz6KfFRzS/V7VOvbtXTYgZF3kA/y8Ia+DXgp8L6a6+e51es6rGHQYVv7InDmsm3tc+SbxC0ffgv5SOsZ5HBsL7fPkLv0juoxrxny0d3fVuNeSb5nf6fXfxR5h/AR4C3kneSXgRNqvv4Lya3uPdV4PY9uBlxmJ5O34zuAXwFeX603Z9SY/qeojiSqv10KXNRnvKXhfTPVDmklXysaaaAZ5D7kf0vVwiSH418Dv08OvG7h/dJqY3oNOcg+DeyoOc9nU7WIaw5/TjWvfSwJpWXDXERuXT2dHDrvBqaoQp7cCn9ql3HPJx/O/041nYP96iMH0Vd4ZKh2DODq/8cCb6u+9lTz/C/9NpBB5lPN463VPM4mh+qXqVpHXcaZILfq/qBaXj9P/hD1sCMOckv7d8kts5PJwXAZ8IvkYPktYGuPee0hB8rrq9dxDNXhfI114Fxyt8m/rDHsqeRD+dOr5fwRenTPkMPnbqqGCHln9t/JRwZ1uiYuJO/E+nWv/Gq1jNrb2kuAj5O7Zx7VY7yjq/foCeQuw37rzFHAP6MKRvKR2i302EECj6te72+Rt/8XVH/v1zV1BvB/yEcFz+o17EqWWTXs06u63lqtZx+hR7cZ+cjrOvJO63zgkmo7OKXGvLZU319D3lF27WbqOZ2VjDTQDHLr6a3k4H0buSvjxVXhv0fvMHoheQ/4CeC5Q65zEpjs8r9298X7l6y41wF/RA6wvgufHNgPklusJ9esqR0mF9ccfge5dfLZank/Z63ns2Qenya3Gvq+L+SuhSvIH4J+CDi9x7pyOXkH+ppqnJdX/3stNXbG5LC6lxqt5w7jnl3nvSG3ut9Ebp1+jS47+2XjvIz8mcpHgb+kzyF/h/F7duEtWX7XkY9O30buCnk+eQfY8X0iHzEcUwXPzwxY05bqfTlIl66/LuO9BZitOewTyEeSfVvaK1lmy97To8in1Hbtxlu2Tr+uyqab6m5rS8afqrPedB1/pSMOWORjyGH9RuCs6m9bqfHBC7m/7ehR1Nmnjnb3RbvVtI2HW5I9D8WXTOOFwM4B51srTJaNc8wgK+1K5lO9L8cMOI8jqNEnSO6K+AS5VXvXCt6rgZfZCuZxBPl2x7UO+atxLiC3HNufRQQ1PxAfYB6PJR9Bvol8RHAG+Yhqe5/xfn2Q17JkHXg1PbrKlg3fvjfSXvJN62pt1/Q4WljD5bai94Hctdf1M55hfY3lJlMRsTWl9ODIZ7xKEXEe+fDo7Smlm6rTqR6f8ilcWkPVeb97yKeE7U0pfXO8Fa2NiHgxuQ/5l1NKNw95XmeR19crUj6XuNew21JKD6xgHu1TUWsPT+5e+IeU0lcHnZ8y7w44oIg4l3yq3htTSn817no2uog4IqX0/8Zdx1qKiLOBb6SU7hnyfJ5Ebg1+a5jz0egZ3Cswqg1PkjoxuCWpMBvmxjyStFkY3JJUGINbkgpjcEtSYQxuSSqMwS1JhTG4Jakw/x+LFkjc6NGRjQAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dend = dendrogram(linkage(cgmlst_profiles, method='complete', metric='euclidean'), leaf_label_func=lambda x: ascii_lowercase[x])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Single linkage, Euclidean distances" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEACAYAAABI5zaHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGkFJREFUeJzt3Xu0XWV97vHvQy5cDBA0WxNzISqxVhA3moMgp8NoGgyICVXQgHJR6BYBwctpK/QUK9Yi2FqPxSFnCwzAGsUiw0YbqrQQL+0A3YkbwkU6Ug+WUAxbUDBHS07gd/54310XK+sy59pr7b0y83zG2GOvy/vO+c7Leua75nrnWooIzMysWvaa6gaYmVn3OdzNzCrI4W5mVkEOdzOzCnK4m5lVkMPdzKyCHO5mZhXkcDczqyCHu5lZBU0vWlDSNGAEeDgiTqh7bm/gBuDVwGPA2yPiwVbTmzNnTixevLhse83M9mgbN278WUQMtCtXONyBC4H7gQMaPHcW8POIOETSGuBy4O2tJrZ48WJGRkZKzN7MzCT9pEi5QqdlJC0A3gRc3aTIauD6fPsmYLkkFZm2mZl1X9Fz7p8G/hB4psnz84GHACJiJ/AE8LwJt87MzDrSNtwlnQA8GhEbJzozSUOSRiSNjI2NTXRyZmbWRJGe+zHAKkkPAl8G3iDpb+rKPAwsBJA0HTiQ9MHqs0TEcEQsjYilAwNtPw8wM7MOtQ33iLgoIhZExGJgDXBbRLyzrtg64Ix8+6Rcxl8Ub2Y2RcqMlnkWSZcCIxGxDrgG+IKkLcDjpIOAmZlNkVLhHhEbgA359iU1j/8ncHI3G2ZmZp3zFapmZhXU8WmZXhkehrVrJ3eep54KQ0OTO08zs17qu5772rUwOjp58xsdnfyDiZlZr/Vdzx1gcBA2bJiceS1bNjnzMTObTH3Xczczs4lzuJuZVZDD3cysghzuZmYV1JcfqJY1keGT4yNzOv1g1cMozawfVaLnPpHhk4OD6a8THkZpZv2qEj13mNzhk+M8jNLM+lUleu5mZvZsDnczswpyuJuZVZDD3cysgirzgWoZnQydfOQR2Lbt2Y9t357+z55dvg07dqQ/gFmzytcvotNRQEV5GKhZ/9oje+6dDJ3ctu03YT5u1qzOg3nHDnj66c7q9gMPAzXrb3tkzx3KD50cH/bYreGW3Z7eZPMwULP+1rbnLmkfSd+XdJekeyV9tEGZMyWNSRrNf2f3prlmZlZEkZ77U8AbImK7pBnA9yTdEhF31JW7MSLO734TzcysrLbhHhEBjJ9tnpH/opeNMjOziSn0gaqkaZJGgUeBWyPizgbF3irpbkk3SVrY1VaamVkphcI9Ip6OiEFgAXCkpMPqinwdWBwRhwO3Atc3mo6kIUkjkkbGxsYm0m4zM2uh1GiZiPiFpNuBlcA9NY8/VlPsauCKJvWHgWGApUuX+tROl3Rr3H4ZZcb4147p74ZeXRfQaxO57mCi26sf9Pq6i17b3a7rKDJaZkDS7Hx7X2AF8KO6MvNq7q4C7u9mI621bo3bL6PMGP/dfUx/P5jo9rKJ2R2v6yjSc58HXC9pGulg8JWI+IakS4GRiFgHXCBpFbATeBw4s1cNtsametx+v8yrqrwOp9bueF1HkdEydwNHNHj8kprbFwEXdbdpZmbWqT3y6wfMzKrO4W5mVkEOdzOzCnK4m5lV0B77rZCTodX48/Ghi40+he/X8bSdjKeH1staRL+ujyL64bcD6u3YATNnTnw6zewuvyNQZtuU3Yf7YZ91z72HWo0/Hxxs/CLo5/G0nYynh+bLWkQ/r48i+uG3A+rt2LH7jpnv5v5QZtuU2Yf7ZZ91z73HOh1/3q/KLs9E9fv6KKLfrkHYncfMd3t/6MX+3C/7rHvuZmYV5HA3M6sgh7uZWQU53M3MKsjhbmZWQR4tY2Z9pWrXh0wV99zNrK9U7fqQqeKeu5n1napdHzIV3HM3M6sgh7uZWQU53M3MKqjID2TvI+n7ku6SdK+kjzYos7ekGyVtkXSnpMW9aKyZmRVTpOf+FPCGiHglMAislHRUXZmzgJ9HxCHAXwGXd7eZZmZWRttwj2T8C0Jn5L+oK7YauD7fvglYLklda6WZmZVSaCikpGnARuAQ4LMRcWddkfnAQwARsVPSE8DzgJ91sa1m1kTVf3jCyiv0gWpEPB0Rg8AC4EhJh3UyM0lDkkYkjYyNjXUyCTNroOo/PGHllbqIKSJ+Iel2YCVwT81TDwMLga2SpgMHAo81qD8MDAMsXbq0/tSOmU1AlX94wsorMlpmQNLsfHtfYAXwo7pi64Az8u2TgNsiwuFtZjZFivTc5wHX5/PuewFfiYhvSLoUGImIdcA1wBckbQEeB9b0rMVmZtZW23CPiLuBIxo8fknN7f8ETu5u08zMrFO+QtXMrIIc7mZmFeRwNzOrIH+fu/VUmYtrGil7wU0jvginsSLbpsz63xPWcyfr7JFHYNu2xmVbXW8w0fXpnrv1VJmLaxopc8FNI74Ip7ki26bo+t9T1nMn62zbNti+vXn5RrqxPt1zt57rxcU1RfkinNa6tW32pPXc6a9ETfYvS7nnbmZWQQ53M7MKcribmVWQw93MrIIc7mZmFeRwNzOrIA+FtN3G8MZh1m4uN/h39KefBmDZde8vVe/UV5zK0KsrfkWOVZrD3XYbazevZfSnowzOLX5V0+CHy4U6wOhP01UqDnfbnTncbbcyOHeQDWdu6Ok8ll23rKfTN5sMPuduZlZBDnczswoq8huqCyXdLuk+SfdKurBBmWWSnpA0mv8uaTQtMzObHEXOue8EPhQRmyTtD2yUdGtE3FdX7rsRcUL3m2hmZmW17blHxCMRsSnf/iVwPzC/1w0zM7POlTrnLmkx6cey72zw9NGS7pJ0i6RDu9A2MzPrUOGhkJJmAV8F3h8RT9Y9vQk4OCK2Szoe+BqwpME0hoAhgEWLFnXcaDMza61Qz13SDFKwfzEibq5/PiKejIjt+fZ6YIakOQ3KDUfE0ohYOjAwMMGmm5lZM0VGywi4Brg/Ij7VpMzcXA5JR+bpPtbNhpqZWXFFTsscA5wGbJY0/uuBFwOLACLiKuAk4L2SdgK/BtZERPSgvWZmVkDbcI+I7wFqU+ZK4MpuNcrMzCbGV6iamVWQw93MrIIc7mZmFeRwNzOrIIe7mVkFOdzNzCrI4W5mVkEOdzOzCvJvqFpfGd44zNrNaxs+N/7D1Y1+4/TUV5xa6getJ2s+ZlPFPXfrK2s3r/2vcK03OHeQwbmDuzw++tPRpkE91fMxmyruuVvfGZw7yIYzNxQu36iH3U/zMZsK7rmbmVWQw93MrIIc7mZmFeRwNzOrIIe7mVkFOdzNzCrI4W5mVkFFfiB7oaTbJd0n6V5JFzYoI0mfkbRF0t2SXtWb5pqZWRFFLmLaCXwoIjZJ2h/YKOnWiLivpsxxwJL89xrgc/m/mZlNgbY994h4JCI25du/BO4H5tcVWw3cEMkdwGxJ87reWjMzK6TUOXdJi4EjgDvrnpoPPFRzfyu7HgDMzGySFA53SbOArwLvj4gnO5mZpCFJI5JGxsbGOpmEmZkVUCjcJc0gBfsXI+LmBkUeBhbW3F+QH3uWiBiOiKURsXRgYKCT9pqZWQFFRssIuAa4PyI+1aTYOuD0PGrmKOCJiHiki+00M7MSioyWOQY4DdgsafwLsC8GFgFExFXAeuB4YAvwK+Bd3W+qmZkV1TbcI+J7gNqUCeC8bjXKzMwmxleomplVkMPdzKyCHO5mZhXkcDczqyCHu5lZBTnczcwqyOFuZlZBDnczswoqcoWqmRkMD8PatY2fG/10+r/s/bs+d+qpMDTUu3ZZQw53Mytm7VoYHYXBwV2e2jDYINQhlQeH+xRwuJtZcYODsGFD8fLLlvWqJdaGz7mbmVWQw93MrIIc7mZmFeRwNzOrIIe7mVkFOdzNzCrI4W5mVkFFfiD7WkmPSrqnyfPLJD0haTT/XdL9ZpqZWRlFLmK6DrgSuKFFme9GxAldaZGZmU1Y2557RHwHeHwS2mJmZl3SrXPuR0u6S9Itkg7t0jTNzKxD3fhumU3AwRGxXdLxwNeAJY0KShoChgAWLVrUhVmbmVkjE+65R8STEbE9314PzJA0p0nZ4YhYGhFLBwYGJjprMzNrYsLhLmmuJOXbR+ZpPjbR6ZqZWefanpaR9CVgGTBH0lbgI8AMgIi4CjgJeK+kncCvgTURET1rsZmZtdU23CPilDbPX0kaKmlmZn3CV6iamVWQw93MrIIc7mZmFeRwNzOrIIe7mVkFOdzNzCrI4W5mVkEOdzOzCnK4m5lVkMPdzKyCHO5mZhXkcDczqyCHu5lZBTnczcwqyOFuZlZBDnczswpyuJuZVZDD3cysgtqGu6RrJT0q6Z4mz0vSZyRtkXS3pFd1v5lmZlZGkZ77dcDKFs8fByzJf0PA5ybeLDMzm4i24R4R3wEeb1FkNXBDJHcAsyXN61YDzcysvG6cc58PPFRzf2t+bBeShiSNSBoZGxvrwqzNzKyRSf1ANSKGI2JpRCwdGBiYzFmbme1RuhHuDwMLa+4vyI+ZmdkU6Ua4rwNOz6NmjgKeiIhHujBdMzPr0PR2BSR9CVgGzJG0FfgIMAMgIq4C1gPHA1uAXwHv6lVjzcysmLbhHhGntHk+gPO61iIzM5swX6FqZlZBDnczswpyuJuZVZDD3cysghzuZmYV5HA3M6sgh7uZWQU53M3MKsjhbmZWQQ53M7MKcribmVWQw93MrIIc7mZmFeRwNzOrIIe7mVkFOdzNzCrI4W5mVkGFwl3SSkkPSNoi6cMNnj9T0pik0fx3dvebamZmRRX5DdVpwGeBFcBW4AeS1kXEfXVFb4yI83vQRjMzK6lIz/1IYEtE/DgidgBfBlb3tllmZjYRRcJ9PvBQzf2t+bF6b5V0t6SbJC3sSuvMzKwj3fpA9evA4og4HLgVuL5RIUlDkkYkjYyNjXVp1mZmVq9IuD8M1PbEF+TH/ktEPBYRT+W7VwOvbjShiBiOiKURsXRgYKCT9pqZWQFFwv0HwBJJL5I0E1gDrKstIGlezd1VwP3da6KZmZXVdrRMROyUdD7wTWAacG1E3CvpUmAkItYBF0haBewEHgfO7GGbzcysjbbhDhAR64H1dY9dUnP7IuCi7jbNzMw65StUzcwqyOFuZlZBDnczswpyuJuZVZDD3cysghzuZmYV5HA3M6sgh7uZWQU53M3MKsjhbmZWQQ53M7MKcribmVWQw93MrIIc7mZmFeRwNzOrIIe7mVkFOdzNzCrI4W5mVkGFwl3SSkkPSNoi6cMNnt9b0o35+TslLe52Q83MrLi24S5pGvBZ4Djg5cApkl5eV+ws4OcRcQjwV8Dl3W6omZkVV6TnfiSwJSJ+HBE7gC8Dq+vKrAauz7dvApZLUveaaWZmZRQJ9/nAQzX3t+bHGpaJiJ3AE8DzutFAMzMrb/pkzkzSEDCU726X9EDzsp1Mv7flXWfy6uhd5WfS13X6dD13VGeSGta3yz/1dQ4uUrdIuD8MLKy5vyA/1qjMVknTgQOBx+onFBHDwHCRhpmZWeeKnJb5AbBE0oskzQTWAOvqyqwDzsi3TwJui4joXjPNzKyMtj33iNgp6Xzgm8A04NqIuFfSpcBIRKwDrgG+IGkL8DjpAGBmZlNE7mCbmVWPr1A1M6sgh7u1JWm/qW6DmZXjcN/DSHpBmQvMJK0C/jp/mG5tSFoo6fNT3Q4zh3sLeVhnJ/X2yv/76ipdSfOB/0n6Com2bZP0POAC0tdJLJD03JLz66vlh99sm16JiIeAKyW9uJfzmUy9Xmd5HhO65mai9XtF0sGS9ilZZ7Ab8+6rcJd0fA6UyZjXMW2eHwBu7iSgIuKZfPNgSdNL9pSXlJ2npLkF6/wHsBE4AnhLgTo7gJ3AR4BPAc+0Lr6LF+b2FX7hlViW2jr/XdJQkXrj26bD9dzy9NT49CLiLuBzkn5YZvp5Gr/dQZ19y9Ypo2adnShpXpm6ks6TtLJNmTnAlrKdh5r6xwDvyLebblNJM8e/F0vS8rLLkusdWKLs84E/oMTV+pJOJ+XOy8q2rV5fhHvNi/8i4A0d1F8t6XclzSpYfn/gLElva1YmIsZIQzpXFN3pJL1W0pp8+33AzcC1wIeL9H7ykNO/B66RdFLB3vXhwKXAW9vs2MrXHuxF+gK4PwJWt6oTEb8EbiN9d9DdEfGLooGYl+UqSZ8AzpW0d7eWpab8+Dp9MXA48M5m9Rpsm7Lr+Rzgs5L+t6TXNwr62ms7IuKNwEOSvtNu2jXzeC/wSUkvKFHnfOAKSZeVDJ5jJJ0p6ahm+2btOssuJA2HLjqP1cBy4L5W5SLiZ8D7gH+RdFDR6ed5CHgN8Po8rVbD/xYBn5b0BeCDpM5LmXmdC5wj6YCCVX6W5/m+gtO/kLSOfwmUWg+N9EW4k1YAwO3AU1D8LX3e+a4CVgDfbBfEkl6eQ+u7wJJWZSPiV8C+wGZJsws05yDgMkkfA44GTgauy9P4RKuAVzq3fTjp2zf/Jdc/vU1gvxn4a+BQ0tc6NA3FiAhJ7yDtaBfneby+VZ3sRlK4nyzpA0UuTpN0IvA24DTSC++lEfFUmzrjy3JYu2Wp8ZL8/29I2/MImq+z8W3zp8ArKbee3wqcB/wvYDuwkhbvfMa3c0SsIn3NxrfbLMf49j8HOC8itrUrn+ucS9rHPgG8m/TZSMt9Otd7LXA1afufA/xlk33zIODPawJ+P4p/Tfh84Epge0T8u9q8g42IrwMfAEaKBryk/fL++BngkHygayoitgB3k/bnWyLiMUnTCh7c30O6UHNtRDypFu9GJc2X9Fv5Hc/5wAva9cQlvZ50AejRwOeBF7VrU1sRMaV/pBfzA6S3/aPAncCrSIG1H3ksfpO6i4C3Ay/J998JjADPbVL+aOAnwHuAl5Je3L9foI0rgX8DDipQdgVwD/DFfH8m6SByPXBYkzrzgX8nXSA2XmcN8Je5rbusA+AFwPeAl+X755BeTCc2W2ekXvEf1MzjfcAGUhA3Xc+5/BHAvwLnF1gHZwBvAc4EvgXMzI+/tEn55+dleXndsqxusSyLgAeB0/L96aSDybdIQddona0ANgOfL7qec7mLgf9RU+ds0rejzmyxDvaqub0e+HabdXYOcHG+PaPAOj6AFNDzSJ+L/ANwA3ALsKRFvSNJnaij8v3FwJ8BH2xS/jh+E4gfy9vqAFLIt3w95H1gG/C2msfa7WfHUeC1RjowfRQ4Id8/FvhTUkeqVWYcQsqJTcA7ah6f1aLOvsDXgDeSTrG8l9ShPLdB2eeQvvb8NlIn5dBcdnmr5QfmAHPy7cuAy2vW4ap2+0PDaXZSqZt/eSd5CfCyvFDPkILwVtIpjX2a1LuAdCC4L7/Y9smPv6PRzpFflAtIgb4JWJVf1JuBVxdo53HAj2hy4Kgruxr4OfD2msduBk5u80L4D2BNzXo5A/hz4MAG5Q/Ky3JMvj8j70TfBlY2mceJeSc9tOaxO0gfmO5fYLkOJ73VfE+bcq/L2+C7ddvrChoEV16WO4DfqVuWfyb1Zpq9IN6ct+UpNY/dAvxFo3VWs20eLbqea9bb35EPPvmxfwJ+u92+Xbf9L2+zf/0D8Fs1j50GnNiizt6kdyG35/vK2+dSmhx4SAe4p/nNgWQ6cDxwdYv5vCnv++OvzQ15eb4E7NdmHZxAOji8rVW5sq810qm4s0mv/w+Q3lltAF5TcB5vBu7Ky3ZsXmfTW5QfAn5I+qqVy4BzSb9zsct6BvYhdVBvBP6YdID7ATC/YNtWk3r8y3O9pgfrltPppFKv/khHyE8BzyWd22u4MvKL7QZS7/sK0tvlZeMbh9Sbf1FN+aNJR/lD806xjtSr/P28w/4JMK3gSt9U+6Jts1P/mNSbOJF0EHlJmzpvyi+E2uBpGrrAh3LbD8v33wh8g3QqaO8G5WcDH89/y3Mb/7HoTpencViB5ZiVt+Nf5O1yOumD3IbvXHKdDwKX1CzLscBXgU+SDtjNAv74vM7eTTpAfgt4YZfX82xS7/bjpHBclV90cwqsr73y/3eTgrFhgJB6wx8jBccJwCl5Hoe0mf4S0impV+R6NwKLCuzHW8gHRdLB+PukXnmz9bycFG7n5fX1HGBhwX3mONJXgv9eif2s0GuNlAF/krfPM3n5pzdbjrq6K/N+MELNgbtJ2X2A/0Y+4JDe8d1Oi4Mb6QsUn5/b91Xg6Px4u3cvrwF+TTqTcWi75Wg6nU4rdvuP1Ot4Tt6hf6dFufFTGNfUrPSPkc7Xrmj04iH12M/OG/HdpLfgb8nPnUWbsKqbVtO3bw3KnkjqJf0d8OKCdcZfCCcVKLsgL/stOXgeIL3tXge8skmdF5J6BbeRgvDwHm3PeXk9/z3pQPyKAsvyZ7lNHyedAjo2b69P0Dp8X0fqta1vttwTWc916+1bpF5rofnU1F9WIEDmkU7PrCf1ittuG1Lv/Y9I73TvbTePmnpvBp4k/bjOVyjw1p/UedhKiV54Td0VRV8DNXUKvdbyOtiHNMy34am/FnUHgIES5ffKmbGZFp2VBvX+GBguWPa5pHfYHfXYx//67rtlJP0h6Xx1/dcK15Z5C+mc7Ici4kv5w40rSEfuSyJ9ENqo3itJPaP9SRt0wsON2pH0OuAnEfFgiTorgH+LiB8XKHsA8FrS2/P1pM8pPg+siBYfzEl6DqkHsb1ouzohaQZARPy/AmX3J73LOgz4YUTcrvQzj/tHxC/a1N0vzSZ+XaJthddz3XwUEf+3aJ2ylC8Yi/TLZ0XKzwDmAs+0et00qLeKdDriixHxyfEPFqNFKHSyznqtZiTYZMxrP9KZgTsi4v4C5RURkT+UfhfpNFvbfVTS3tFmEELbafRhuE+P9GtO7cq9iRTUl9UE/EGRhjC2qvd80lvMC0lvyx/sQrP7Qv7E/TLSOfG7pro9EyVpWkQ8PdXtqDJJx5I+27ogIm6e6vbsDsoeTPJB8wTg/0TEPb1rWd18+y3cy5B0HOnHPz4YEX9bsu6MIr3J3YnSRRkzI+InU90W2330Y2/cJm63Dnfwjmlm1shuH+5mZrarfrlC1czMusjhbmZWQQ53M7MKcribmVWQw93MrIIc7mZmFeRwNzOroP8PJrrXHyT4XUQAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dend = dendrogram(linkage(cgmlst_profiles), leaf_label_func=lambda x: ascii_lowercase[x])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Convert linkage array into ClusterNode object with reference to tree root" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"T = to_tree(Z, rd=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<scipy.cluster.hierarchy.ClusterNode at 0x7fc0923c51d0>" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"T" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Functions for converting a SciPy ClusterNode object into a Newick string" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def _scipy_tree_to_newick_list(node: ClusterNode, newick: List[str], parentdist: float, leaf_names: List[str]) -> List[str]:\n", | |
" \"\"\"Construct Newick tree from SciPy hierarchical clustering ClusterNode\n", | |
"\n", | |
" This is a recursive function to help build a Newick output string from a scipy.cluster.hierarchy.to_tree input with\n", | |
" user specified leaf node names.\n", | |
"\n", | |
" Notes:\n", | |
" This function is meant to be used with `to_newick`\n", | |
"\n", | |
" Args:\n", | |
" node (scipy.cluster.hierarchy.ClusterNode): Root node is output of scipy.cluster.hierarchy.to_tree from hierarchical clustering linkage matrix\n", | |
" parentdist (float): Distance of parent node of `node`\n", | |
" newick (list of string): Newick string output accumulator list which needs to be reversed and concatenated (i.e. `''.join(newick)`) for final output\n", | |
" leaf_names (list of string): Leaf node names\n", | |
"\n", | |
" Returns:\n", | |
" (list of string): Returns `newick` list of Newick output strings\n", | |
" \"\"\"\n", | |
" if node.is_leaf():\n", | |
" return newick + [f'{leaf_names[node.id]}:{parentdist - node.dist}']\n", | |
"\n", | |
" if len(newick) > 0:\n", | |
" newick.append(f'):{parentdist - node.dist}')\n", | |
" else:\n", | |
" newick.append(');')\n", | |
" newick = _scipy_tree_to_newick_list(node.get_left(), newick, node.dist, leaf_names)\n", | |
" newick.append(',')\n", | |
" newick = _scipy_tree_to_newick_list(node.get_right(), newick, node.dist, leaf_names)\n", | |
" newick.append('(')\n", | |
" return newick\n", | |
"\n", | |
"\n", | |
"def to_newick(tree: ClusterNode, leaf_names: List[str]) -> str:\n", | |
" \"\"\"Newick tree output string from SciPy hierarchical clustering tree\n", | |
"\n", | |
" Convert a SciPy ClusterNode tree to a Newick format string.\n", | |
" Use scipy.cluster.hierarchy.to_tree on a hierarchical clustering linkage matrix to create the root ClusterNode for the `tree` input of this function.\n", | |
"\n", | |
" Args:\n", | |
" tree (scipy.cluster.hierarchy.ClusterNode): Output of scipy.cluster.hierarchy.to_tree from hierarchical clustering linkage matrix\n", | |
" leaf_names (list of string): Leaf node names\n", | |
"\n", | |
" Returns:\n", | |
" (string): Newick output string\n", | |
" \"\"\"\n", | |
" newick_list = _scipy_tree_to_newick_list(tree, [], tree.dist, leaf_names)\n", | |
" return ''.join(newick_list[::-1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'((((((((((((((((((((r:0.30000000000000004,k:0.30000000000000004):0.0,l:0.30000000000000004):0.09999999999999998,z:0.4):0.09999999999999998,p:0.5):0.0,n:0.5):0.0,((s:0.4,e:0.4):0.0,o:0.4):0.09999999999999998):0.0,h:0.5):0.0,i:0.5):0.0,t:0.5):0.0,c:0.5):0.0,j:0.5):0.0,q:0.5):0.0,d:0.5):0.0,(w:0.4,f:0.4):0.09999999999999998):0.0,b:0.5):0.0,(y:0.30000000000000004,g:0.30000000000000004):0.19999999999999996):0.0,u:0.5):0.10000000000000009,a:0.6000000000000001):0.0,(x:0.5,m:0.5):0.10000000000000009):0.0,v:0.6000000000000001);'" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"to_newick(T, ascii_lowercase)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Minimum Spanning Tree from cgMLST profiles" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def mst_adjacency_list(dm: np.array, names: List[str]) -> List[Dict[str, Union[str, int, float]]]:\n", | |
" mst = minimum_spanning_tree(squareform(dm))\n", | |
" # scipy.sparse.find returns a tuple with 3 vectors (targets, sources and weights) when given an MST as input\n", | |
" sources, targets, weights = sp.sparse.find(mst)\n", | |
" # return a similar data structure as NetworkX json_graph - list of dicts\n", | |
" links = []\n", | |
" for s,t,w in zip(sources, targets, weights):\n", | |
" # undirected graph so order of source to target doesn't matter, however\n", | |
" # swap source and target ids if source id is larger than target id so\n", | |
" # that it can be rendered similarly as NetworkX MST graphs\n", | |
" if s > t:\n", | |
" s, t = (t, s)\n", | |
" links.append(dict(source=names[s], target=names[t], weight=w))\n", | |
" return links" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'source': 'a', 'target': 'p', 'weight': 0.6000000000000001},\n", | |
" {'source': 'b', 'target': 'w', 'weight': 0.5},\n", | |
" {'source': 'e', 'target': 'o', 'weight': 0.4},\n", | |
" {'source': 'e', 'target': 's', 'weight': 0.4},\n", | |
" {'source': 'f', 'target': 'w', 'weight': 0.4},\n", | |
" {'source': 'g', 'target': 'y', 'weight': 0.30000000000000004},\n", | |
" {'source': 'h', 'target': 'o', 'weight': 0.5},\n", | |
" {'source': 'i', 'target': 'w', 'weight': 0.5},\n", | |
" {'source': 'c', 'target': 'j', 'weight': 0.5},\n", | |
" {'source': 'l', 'target': 'r', 'weight': 0.30000000000000004},\n", | |
" {'source': 'k', 'target': 'p', 'weight': 0.5},\n", | |
" {'source': 'd', 'target': 'q', 'weight': 0.5},\n", | |
" {'source': 'f', 'target': 'q', 'weight': 0.5},\n", | |
" {'source': 'j', 'target': 'q', 'weight': 0.5},\n", | |
" {'source': 'k', 'target': 'r', 'weight': 0.30000000000000004},\n", | |
" {'source': 'i', 'target': 's', 'weight': 0.5},\n", | |
" {'source': 'c', 'target': 't', 'weight': 0.5},\n", | |
" {'source': 'n', 'target': 'u', 'weight': 0.5},\n", | |
" {'source': 't', 'target': 'v', 'weight': 0.6000000000000001},\n", | |
" {'source': 'm', 'target': 'x', 'weight': 0.5},\n", | |
" {'source': 'b', 'target': 'y', 'weight': 0.5},\n", | |
" {'source': 'l', 'target': 'z', 'weight': 0.4},\n", | |
" {'source': 'n', 'target': 'z', 'weight': 0.5},\n", | |
" {'source': 't', 'target': 'z', 'weight': 0.5},\n", | |
" {'source': 'x', 'target': 'z', 'weight': 0.6000000000000001}]" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dm = sp.spatial.distance.pdist(cgmlst_profiles, metric='hamming')\n", | |
"mst_links = mst_adjacency_list(dm=dm, names=list(ascii_lowercase))\n", | |
"mst_links" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def colours():\n", | |
" while True:\n", | |
" yield '0xFF0000'\n", | |
" yield '0x00FF00'\n", | |
" yield '0x0000FF'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g_colours = colours()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"jgraph_data = {'nodes': {name: {'color': g_colours.__next__()} for i, name in enumerate(ascii_lowercase)},\n", | |
" 'edges': [dict(source=edge['source'], target=edge['target'], size=edge['weight']*10) for edge in mst_links]}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'nodes': {'a': {'color': '0xFF0000'},\n", | |
" 'b': {'color': '0x00FF00'},\n", | |
" 'c': {'color': '0x0000FF'},\n", | |
" 'd': {'color': '0xFF0000'},\n", | |
" 'e': {'color': '0x00FF00'},\n", | |
" 'f': {'color': '0x0000FF'},\n", | |
" 'g': {'color': '0xFF0000'},\n", | |
" 'h': {'color': '0x00FF00'},\n", | |
" 'i': {'color': '0x0000FF'},\n", | |
" 'j': {'color': '0xFF0000'},\n", | |
" 'k': {'color': '0x00FF00'},\n", | |
" 'l': {'color': '0x0000FF'},\n", | |
" 'm': {'color': '0xFF0000'},\n", | |
" 'n': {'color': '0x00FF00'},\n", | |
" 'o': {'color': '0x0000FF'},\n", | |
" 'p': {'color': '0xFF0000'},\n", | |
" 'q': {'color': '0x00FF00'},\n", | |
" 'r': {'color': '0x0000FF'},\n", | |
" 's': {'color': '0xFF0000'},\n", | |
" 't': {'color': '0x00FF00'},\n", | |
" 'u': {'color': '0x0000FF'},\n", | |
" 'v': {'color': '0xFF0000'},\n", | |
" 'w': {'color': '0x00FF00'},\n", | |
" 'x': {'color': '0x0000FF'},\n", | |
" 'y': {'color': '0xFF0000'},\n", | |
" 'z': {'color': '0x00FF00'}},\n", | |
" 'edges': [{'source': 'a', 'target': 'p', 'size': 6.000000000000001},\n", | |
" {'source': 'b', 'target': 'w', 'size': 5.0},\n", | |
" {'source': 'e', 'target': 'o', 'size': 4.0},\n", | |
" {'source': 'e', 'target': 's', 'size': 4.0},\n", | |
" {'source': 'f', 'target': 'w', 'size': 4.0},\n", | |
" {'source': 'g', 'target': 'y', 'size': 3.0000000000000004},\n", | |
" {'source': 'h', 'target': 'o', 'size': 5.0},\n", | |
" {'source': 'i', 'target': 'w', 'size': 5.0},\n", | |
" {'source': 'c', 'target': 'j', 'size': 5.0},\n", | |
" {'source': 'l', 'target': 'r', 'size': 3.0000000000000004},\n", | |
" {'source': 'k', 'target': 'p', 'size': 5.0},\n", | |
" {'source': 'd', 'target': 'q', 'size': 5.0},\n", | |
" {'source': 'f', 'target': 'q', 'size': 5.0},\n", | |
" {'source': 'j', 'target': 'q', 'size': 5.0},\n", | |
" {'source': 'k', 'target': 'r', 'size': 3.0000000000000004},\n", | |
" {'source': 'i', 'target': 's', 'size': 5.0},\n", | |
" {'source': 'c', 'target': 't', 'size': 5.0},\n", | |
" {'source': 'n', 'target': 'u', 'size': 5.0},\n", | |
" {'source': 't', 'target': 'v', 'size': 6.000000000000001},\n", | |
" {'source': 'm', 'target': 'x', 'size': 5.0},\n", | |
" {'source': 'b', 'target': 'y', 'size': 5.0},\n", | |
" {'source': 'l', 'target': 'z', 'size': 4.0},\n", | |
" {'source': 'n', 'target': 'z', 'size': 5.0},\n", | |
" {'source': 't', 'target': 'z', 'size': 5.0},\n", | |
" {'source': 'x', 'target': 'z', 'size': 6.000000000000001}]}" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"jgraph_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div id=\"graph-f6fd0a4d-9ba8-4202-98cd-8c402d5cf690\"></div>\n", | |
" <script type=\"text/javascript\">\n", | |
" require.config({baseUrl: '/',\n", | |
" paths: {jgraph: ['nbextensions/jgraph.min', 'https://rawgit.com/patrickfuller/jgraph/master/js/build/jgraph.min']}});\n", | |
" require(['jgraph'], function () {\n", | |
" var $d = $('#graph-f6fd0a4d-9ba8-4202-98cd-8c402d5cf690');\n", | |
" $d.width(600); $d.height(400);\n", | |
" $d.jgraph = jQuery.extend({}, jgraph);\n", | |
" $d.jgraph.create($d, {nodeSize: 2.000000,\n", | |
" edgeSize: 0.250000,\n", | |
" defaultNodeColor: '0x5bc0de',\n", | |
" defaultEdgeColor: '0xaaaaaa',\n", | |
" shader: 'basic',\n", | |
" z: 100,\n", | |
" runOptimization: true,\n", | |
" directed: true,\n", | |
" showSave: false});\n", | |
" $d.jgraph.draw({\n", | |
" \"edges\": [\n", | |
" { \"size\": 6.000000000000001, \"source\": \"a\", \"target\": \"p\" },\n", | |
" { \"size\": 5.0, \"source\": \"b\", \"target\": \"w\" },\n", | |
" { \"size\": 4.0, \"source\": \"e\", \"target\": \"o\" },\n", | |
" { \"size\": 4.0, \"source\": \"e\", \"target\": \"s\" },\n", | |
" { \"size\": 4.0, \"source\": \"f\", \"target\": \"w\" },\n", | |
" { \"size\": 3.0000000000000004, \"source\": \"g\", \"target\": \"y\" },\n", | |
" { \"size\": 5.0, \"source\": \"h\", \"target\": \"o\" },\n", | |
" { \"size\": 5.0, \"source\": \"i\", \"target\": \"w\" },\n", | |
" { \"size\": 5.0, \"source\": \"c\", \"target\": \"j\" },\n", | |
" { \"size\": 3.0000000000000004, \"source\": \"l\", \"target\": \"r\" },\n", | |
" { \"size\": 5.0, \"source\": \"k\", \"target\": \"p\" },\n", | |
" { \"size\": 5.0, \"source\": \"d\", \"target\": \"q\" },\n", | |
" { \"size\": 5.0, \"source\": \"f\", \"target\": \"q\" },\n", | |
" { \"size\": 5.0, \"source\": \"j\", \"target\": \"q\" },\n", | |
" { \"size\": 3.0000000000000004, \"source\": \"k\", \"target\": \"r\" },\n", | |
" { \"size\": 5.0, \"source\": \"i\", \"target\": \"s\" },\n", | |
" { \"size\": 5.0, \"source\": \"c\", \"target\": \"t\" },\n", | |
" { \"size\": 5.0, \"source\": \"n\", \"target\": \"u\" },\n", | |
" { \"size\": 6.000000000000001, \"source\": \"t\", \"target\": \"v\" },\n", | |
" { \"size\": 5.0, \"source\": \"m\", \"target\": \"x\" },\n", | |
" { \"size\": 5.0, \"source\": \"b\", \"target\": \"y\" },\n", | |
" { \"size\": 4.0, \"source\": \"l\", \"target\": \"z\" },\n", | |
" { \"size\": 5.0, \"source\": \"n\", \"target\": \"z\" },\n", | |
" { \"size\": 5.0, \"source\": \"t\", \"target\": \"z\" },\n", | |
" { \"size\": 6.000000000000001, \"source\": \"x\", \"target\": \"z\" }\n", | |
" ],\n", | |
" \"nodes\": {\n", | |
" \"a\": { \"color\": \"0xFF0000\" },\n", | |
" \"b\": { \"color\": \"0x00FF00\" },\n", | |
" \"c\": { \"color\": \"0x0000FF\" },\n", | |
" \"d\": { \"color\": \"0xFF0000\" },\n", | |
" \"e\": { \"color\": \"0x00FF00\" },\n", | |
" \"f\": { \"color\": \"0x0000FF\" },\n", | |
" \"g\": { \"color\": \"0xFF0000\" },\n", | |
" \"h\": { \"color\": \"0x00FF00\" },\n", | |
" \"i\": { \"color\": \"0x0000FF\" },\n", | |
" \"j\": { \"color\": \"0xFF0000\" },\n", | |
" \"k\": { \"color\": \"0x00FF00\" },\n", | |
" \"l\": { \"color\": \"0x0000FF\" },\n", | |
" \"m\": { \"color\": \"0xFF0000\" },\n", | |
" \"n\": { \"color\": \"0x00FF00\" },\n", | |
" \"o\": { \"color\": \"0x0000FF\" },\n", | |
" \"p\": { \"color\": \"0xFF0000\" },\n", | |
" \"q\": { \"color\": \"0x00FF00\" },\n", | |
" \"r\": { \"color\": \"0x0000FF\" },\n", | |
" \"s\": { \"color\": \"0xFF0000\" },\n", | |
" \"t\": { \"color\": \"0x00FF00\" },\n", | |
" \"u\": { \"color\": \"0x0000FF\" },\n", | |
" \"v\": { \"color\": \"0xFF0000\" },\n", | |
" \"w\": { \"color\": \"0x00FF00\" },\n", | |
" \"x\": { \"color\": \"0x0000FF\" },\n", | |
" \"y\": { \"color\": \"0xFF0000\" },\n", | |
" \"z\": { \"color\": \"0x00FF00\" }\n", | |
" }\n", | |
"});\n", | |
"\n", | |
" $d.resizable({\n", | |
" aspectRatio: 600 / 400,\n", | |
" resize: function (evt, ui) {\n", | |
" $d.jgraph.renderer.setSize(ui.size.width,\n", | |
" ui.size.height);\n", | |
" }\n", | |
" });\n", | |
" });\n", | |
" </script>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"jgraph.draw(jgraph_data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Flat clusters at a number of distance thresholds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from scipy.cluster.hierarchy import fcluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4,\n", | |
" 2, 1, 2, 2], dtype=int32)" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"fcluster(Z, t=0.5, criterion='distance')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"distances = np.unique(dm)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"distances.sort()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])" | |
] | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"distances" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_clusters = pd.DataFrame([fcluster(Z, t=distance, criterion='distance') for distance in distances]).transpose()\n", | |
"df_clusters.index = list(ascii_lowercase)\n", | |
"df_clusters.columns = distances" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0.30000000000000004</th>\n", | |
" <th>0.4</th>\n", | |
" <th>0.5</th>\n", | |
" <th>0.6000000000000001</th>\n", | |
" <th>0.7000000000000001</th>\n", | |
" <th>0.8</th>\n", | |
" <th>0.9</th>\n", | |
" <th>1.0</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td>22</td>\n", | |
" <td>18</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td>20</td>\n", | |
" <td>16</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td>16</td>\n", | |
" <td>12</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td>19</td>\n", | |
" <td>15</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>e</th>\n", | |
" <td>6</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>f</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>g</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>h</th>\n", | |
" <td>13</td>\n", | |
" <td>9</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>i</th>\n", | |
" <td>14</td>\n", | |
" <td>10</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>j</th>\n", | |
" <td>17</td>\n", | |
" <td>13</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>k</th>\n", | |
" <td>9</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>l</th>\n", | |
" <td>9</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>m</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>n</th>\n", | |
" <td>12</td>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>o</th>\n", | |
" <td>8</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>p</th>\n", | |
" <td>11</td>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>q</th>\n", | |
" <td>18</td>\n", | |
" <td>14</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>r</th>\n", | |
" <td>9</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>s</th>\n", | |
" <td>7</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>t</th>\n", | |
" <td>15</td>\n", | |
" <td>11</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>u</th>\n", | |
" <td>21</td>\n", | |
" <td>17</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>v</th>\n", | |
" <td>23</td>\n", | |
" <td>19</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>w</th>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>x</th>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>y</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>z</th>\n", | |
" <td>10</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0\n", | |
"a 22 18 3 1 1 1 1 1\n", | |
"b 20 16 2 1 1 1 1 1\n", | |
"c 16 12 2 1 1 1 1 1\n", | |
"d 19 15 2 1 1 1 1 1\n", | |
"e 6 5 2 1 1 1 1 1\n", | |
"f 4 4 2 1 1 1 1 1\n", | |
"g 3 3 2 1 1 1 1 1\n", | |
"h 13 9 2 1 1 1 1 1\n", | |
"i 14 10 2 1 1 1 1 1\n", | |
"j 17 13 2 1 1 1 1 1\n", | |
"k 9 6 2 1 1 1 1 1\n", | |
"l 9 6 2 1 1 1 1 1\n", | |
"m 1 1 1 1 1 1 1 1\n", | |
"n 12 8 2 1 1 1 1 1\n", | |
"o 8 5 2 1 1 1 1 1\n", | |
"p 11 7 2 1 1 1 1 1\n", | |
"q 18 14 2 1 1 1 1 1\n", | |
"r 9 6 2 1 1 1 1 1\n", | |
"s 7 5 2 1 1 1 1 1\n", | |
"t 15 11 2 1 1 1 1 1\n", | |
"u 21 17 2 1 1 1 1 1\n", | |
"v 23 19 4 1 1 1 1 1\n", | |
"w 5 4 2 1 1 1 1 1\n", | |
"x 2 2 1 1 1 1 1 1\n", | |
"y 3 3 2 1 1 1 1 1\n", | |
"z 10 6 2 1 1 1 1 1" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_clusters" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment