Created
August 3, 2018 21:46
-
-
Save peterk87/b203f62a71d7f4fb273139b219af5e81 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Generating Newick string output from hierarchical clustering of some cgMLST profiles\n", | |
"\n", | |
"Assuming you have an array of cgMLST profiles in, for example, a Pandas DataFrame read from a CSV or tab-delimited file, you can perform hierarchical clustering and output a Newick string.\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"*Enabling in-line figures*" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"%pylab inline" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Imports" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/CSCScience.ca/pkruczkiewicz/2018-08-03-python-newick-mst/venv/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n", | |
" return f(*args, **kwds)\n" | |
] | |
} | |
], | |
"source": [ | |
"from typing import Dict, Tuple, List, Union, Optional\n", | |
"\n", | |
"import numpy as np\n", | |
"import scipy as sp\n", | |
"import pandas as pd\n", | |
"\n", | |
"from fastcluster import linkage\n", | |
"from scipy.sparse.csgraph import minimum_spanning_tree\n", | |
"from scipy.cluster.hierarchy import to_tree, ClusterNode, dendrogram\n", | |
"from scipy.spatial.distance import squareform" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Setting up some random cgMLST profile data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"np.random.seed = 42" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from string import ascii_lowercase" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"allele_number_min = 1\n", | |
"allele_number_max = 4\n", | |
"n_markers = 10" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Our profiles are random integers between a specified range" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cgmlst_profiles = np.random.randint(allele_number_min, high=allele_number_max + 1, size=len(ascii_lowercase) * n_markers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([3, 1, 2, 3, 2, 4, 1, 3, 4, 1, 4, 3, 4, 2, 3, 3, 2, 1, 2, 3, 2, 1,\n", | |
" 2, 4, 1, 2, 4, 2, 2, 4, 1, 4, 2, 4, 2, 3, 2, 2, 3, 1, 3, 3, 2, 4,\n", | |
" 4, 1, 2, 1, 4, 4, 1, 2, 3, 2, 4, 1, 4, 4, 2, 4, 2, 2, 1, 2, 4, 3,\n", | |
" 1, 4, 2, 3, 2, 3, 1, 4, 2, 1, 2, 2, 4, 3, 2, 1, 2, 2, 4, 3, 2, 1,\n", | |
" 1, 1, 3, 1, 2, 4, 3, 2, 4, 1, 3, 2, 2, 2, 4, 1, 1, 4, 2, 4, 3, 2,\n", | |
" 2, 3, 1, 2, 1, 4, 3, 3, 4, 2, 4, 4, 2, 3, 2, 2, 4, 4, 1, 3, 4, 4,\n", | |
" 3, 1, 3, 4, 3, 3, 3, 4, 1, 3, 2, 3, 1, 1, 2, 1, 4, 3, 3, 2, 3, 1,\n", | |
" 1, 4, 1, 2, 3, 1, 1, 4, 2, 4, 3, 1, 4, 4, 3, 4, 2, 3, 4, 2, 1, 4,\n", | |
" 2, 4, 4, 2, 3, 1, 1, 1, 4, 3, 2, 1, 4, 4, 2, 1, 1, 3, 4, 1, 3, 2,\n", | |
" 2, 4, 1, 4, 1, 4, 3, 2, 3, 3, 4, 4, 3, 2, 1, 3, 1, 3, 3, 4, 3, 4,\n", | |
" 1, 4, 3, 2, 4, 3, 2, 1, 2, 4, 4, 1, 1, 2, 2, 2, 1, 4, 1, 1, 4, 2,\n", | |
" 1, 2, 4, 3, 3, 2, 2, 3, 4, 1, 1, 2, 4, 4, 3, 3, 4, 4])" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cgmlst_profiles" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Reshape into 2D array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cgmlst_profiles.shape = (len(ascii_lowercase), n_markers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[3, 1, 2, 3, 2, 4, 1, 3, 4, 1],\n", | |
" [4, 3, 4, 2, 3, 3, 2, 1, 2, 3],\n", | |
" [2, 1, 2, 4, 1, 2, 4, 2, 2, 4],\n", | |
" [1, 4, 2, 4, 2, 3, 2, 2, 3, 1],\n", | |
" [3, 3, 2, 4, 4, 1, 2, 1, 4, 4],\n", | |
" [1, 2, 3, 2, 4, 1, 4, 4, 2, 4],\n", | |
" [2, 2, 1, 2, 4, 3, 1, 4, 2, 3],\n", | |
" [2, 3, 1, 4, 2, 1, 2, 2, 4, 3],\n", | |
" [2, 1, 2, 2, 4, 3, 2, 1, 1, 1],\n", | |
" [3, 1, 2, 4, 3, 2, 4, 1, 3, 2],\n", | |
" [2, 2, 4, 1, 1, 4, 2, 4, 3, 2],\n", | |
" [2, 3, 1, 2, 1, 4, 3, 3, 4, 2],\n", | |
" [4, 4, 2, 3, 2, 2, 4, 4, 1, 3],\n", | |
" [4, 4, 3, 1, 3, 4, 3, 3, 3, 4],\n", | |
" [1, 3, 2, 3, 1, 1, 2, 1, 4, 3],\n", | |
" [3, 2, 3, 1, 1, 4, 1, 2, 3, 1],\n", | |
" [1, 4, 2, 4, 3, 1, 4, 4, 3, 4],\n", | |
" [2, 3, 4, 2, 1, 4, 2, 4, 4, 2],\n", | |
" [3, 1, 1, 1, 4, 3, 2, 1, 4, 4],\n", | |
" [2, 1, 1, 3, 4, 1, 3, 2, 2, 4],\n", | |
" [1, 4, 1, 4, 3, 2, 3, 3, 4, 4],\n", | |
" [3, 2, 1, 3, 1, 3, 3, 4, 3, 4],\n", | |
" [1, 4, 3, 2, 4, 3, 2, 1, 2, 4],\n", | |
" [4, 1, 1, 2, 2, 2, 1, 4, 1, 1],\n", | |
" [4, 2, 1, 2, 4, 3, 3, 2, 2, 3],\n", | |
" [4, 1, 1, 2, 4, 4, 3, 3, 4, 4]])" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cgmlst_profiles" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Compute distance matrix\n", | |
"\n", | |
"Hamming distance metric is the proportion of matching alleles between profiles" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dm = sp.spatial.distance.pdist(cgmlst_profiles, metric='hamming')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Lower triangular distance matrix output" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([1. , 0.8, 0.7, 0.7, 1. , 0.9, 0.8, 0.7, 0.7, 0.9, 0.7, 0.7, 0.8,\n", | |
" 0.7, 0.6, 0.9, 0.8, 0.7, 0.8, 0.8, 0.8, 1. , 0.6, 1. , 0.6, 0.9,\n", | |
" 0.8, 0.7, 0.8, 0.6, 0.7, 0.6, 0.8, 0.8, 0.8, 0.8, 0.8, 0.6, 1. ,\n", | |
" 0.9, 0.6, 0.7, 0.9, 0.9, 0.9, 0.5, 0.8, 0.5, 0.8, 0.7, 0.7, 0.7,\n", | |
" 0.8, 0.7, 0.7, 0.5, 0.8, 0.8, 0.7, 0.9, 0.8, 0.8, 0.6, 0.8, 0.8,\n", | |
" 0.5, 0.7, 0.8, 0.8, 0.8, 0.8, 0.8, 0.7, 0.9, 0.9, 0.6, 0.6, 0.7,\n", | |
" 0.8, 1. , 0.7, 0.8, 0.7, 0.7, 0.5, 0.9, 0.8, 0.9, 0.7, 0.8, 0.6,\n", | |
" 0.8, 0.8, 1. , 0.7, 0.9, 0.5, 0.6, 0.6, 0.9, 0.8, 0.9, 0.9, 0.4,\n", | |
" 0.9, 0.6, 0.7, 0.4, 0.7, 0.7, 0.8, 0.6, 1. , 0.9, 0.7, 0.5, 0.9,\n", | |
" 0.8, 0.9, 0.8, 0.9, 0.8, 0.8, 0.8, 0.8, 0.5, 0.8, 0.8, 0.6, 0.8,\n", | |
" 0.7, 0.4, 0.8, 0.6, 0.7, 0.7, 0.6, 1. , 0.7, 0.7, 0.8, 1. , 0.9,\n", | |
" 0.8, 0.9, 0.7, 0.7, 0.6, 0.9, 0.6, 0.6, 0.6, 0.3, 0.7, 0.8, 0.9,\n", | |
" 0.8, 0.6, 0.8, 1. , 0.5, 0.9, 0.8, 0.6, 0.7, 0.6, 0.7, 0.9, 0.9,\n", | |
" 0.8, 0.7, 0.8, 0.7, 0.8, 0.8, 0.8, 1. , 0.7, 0.9, 0.9, 0.7, 0.5,\n", | |
" 0.7, 1. , 0.9, 0.5, 0.6, 0.7, 0.7, 0.8, 0.9, 0.7, 0.8, 0.8, 0.8,\n", | |
" 0.5, 0.9, 0.7, 0.9, 0.7, 0.8, 0.9, 0.8, 1. , 0.9, 0.6, 0.9, 0.7,\n", | |
" 0.8, 0.5, 0.8, 0.3, 0.8, 0.9, 1. , 0.6, 0.9, 0.9, 0.9, 0.9, 1. ,\n", | |
" 0.7, 0.7, 0.8, 1. , 0.3, 0.8, 0.7, 0.6, 0.7, 0.9, 0.8, 0.7, 0.4,\n", | |
" 0.8, 0.7, 1. , 0.6, 0.9, 1. , 0.9, 0.8, 0.8, 0.9, 0.5, 0.8, 0.9,\n", | |
" 1. , 0.6, 0.6, 0.9, 0.8, 0.8, 0.5, 0.7, 0.7, 0.9, 0.8, 0.5, 0.9,\n", | |
" 0.7, 0.6, 0.7, 0.8, 0.8, 0.8, 0.7, 1. , 0.9, 0.9, 0.9, 0.8, 0.8,\n", | |
" 0.9, 1. , 0.6, 0.9, 0.8, 0.8, 0.9, 0.9, 0.9, 0.8, 0.5, 0.7, 0.7,\n", | |
" 0.9, 1. , 0.9, 0.8, 0.9, 0.9, 0.8, 0.8, 0.8, 0.9, 0.7, 0.6, 0.7,\n", | |
" 0.6, 0.5, 0.8, 0.7, 0.5, 0.7, 0.6, 0.7, 0.8, 0.5, 0.5, 0.7, 0.7,\n", | |
" 0.8, 0.8, 0.5, 0.8, 0.8, 0.6, 0.7, 0.9, 0.6, 0.7, 0.7, 0.6, 0.5])" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dm" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Hierarchical clustering of the distance matrix\n", | |
"\n", | |
"Single linkage by default" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"Z = linkage(dm)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[10. , 17. , 0.3, 2. ],\n", | |
" [11. , 26. , 0.3, 3. ],\n", | |
" [ 6. , 24. , 0.3, 2. ],\n", | |
" [25. , 27. , 0.4, 4. ],\n", | |
" [ 4. , 18. , 0.4, 2. ],\n", | |
" [14. , 30. , 0.4, 3. ],\n", | |
" [ 5. , 22. , 0.4, 2. ],\n", | |
" [15. , 29. , 0.5, 5. ],\n", | |
" [13. , 33. , 0.5, 6. ],\n", | |
" [31. , 34. , 0.5, 9. ],\n", | |
" [ 7. , 35. , 0.5, 10. ],\n", | |
" [ 8. , 36. , 0.5, 11. ],\n", | |
" [19. , 37. , 0.5, 12. ],\n", | |
" [ 2. , 38. , 0.5, 13. ],\n", | |
" [ 9. , 39. , 0.5, 14. ],\n", | |
" [16. , 40. , 0.5, 15. ],\n", | |
" [ 3. , 41. , 0.5, 16. ],\n", | |
" [32. , 42. , 0.5, 18. ],\n", | |
" [ 1. , 43. , 0.5, 19. ],\n", | |
" [28. , 44. , 0.5, 21. ],\n", | |
" [20. , 45. , 0.5, 22. ],\n", | |
" [12. , 23. , 0.5, 2. ],\n", | |
" [ 0. , 46. , 0.6, 23. ],\n", | |
" [47. , 48. , 0.6, 25. ],\n", | |
" [21. , 49. , 0.6, 26. ]])" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Z" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Dendrogram of hierarchical clustering of random cgMLST profiles" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dend = dendrogram(Z, leaf_label_func=lambda x: ascii_lowercase[x])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Complete linkage with Hamming distances might produce a nicer looking tree with distances that make sense to most people." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dend = dendrogram(linkage(dm, method='complete'), leaf_label_func=lambda x: ascii_lowercase[x])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Complete linkage with Euclidean distances" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dend = dendrogram(linkage(cgmlst_profiles, method='complete', metric='euclidean'), leaf_label_func=lambda x: ascii_lowercase[x])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Single linkage, Euclidean distances" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dend = dendrogram(linkage(cgmlst_profiles), leaf_label_func=lambda x: ascii_lowercase[x])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Convert linkage array into ClusterNode object with reference to tree root" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"T = to_tree(Z, rd=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<scipy.cluster.hierarchy.ClusterNode at 0x7fc0923c51d0>" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"T" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Functions for converting a SciPy ClusterNode object into a Newick string" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def _scipy_tree_to_newick_list(node: ClusterNode, newick: List[str], parentdist: float, leaf_names: List[str]) -> List[str]:\n", | |
" \"\"\"Construct Newick tree from SciPy hierarchical clustering ClusterNode\n", | |
"\n", | |
" This is a recursive function to help build a Newick output string from a scipy.cluster.hierarchy.to_tree input with\n", | |
" user specified leaf node names.\n", | |
"\n", | |
" Notes:\n", | |
" This function is meant to be used with `to_newick`\n", | |
"\n", | |
" Args:\n", | |
" node (scipy.cluster.hierarchy.ClusterNode): Root node is output of scipy.cluster.hierarchy.to_tree from hierarchical clustering linkage matrix\n", | |
" parentdist (float): Distance of parent node of `node`\n", | |
" newick (list of string): Newick string output accumulator list which needs to be reversed and concatenated (i.e. `''.join(newick)`) for final output\n", | |
" leaf_names (list of string): Leaf node names\n", | |
"\n", | |
" Returns:\n", | |
" (list of string): Returns `newick` list of Newick output strings\n", | |
" \"\"\"\n", | |
" if node.is_leaf():\n", | |
" return newick + [f'{leaf_names[node.id]}:{parentdist - node.dist}']\n", | |
"\n", | |
" if len(newick) > 0:\n", | |
" newick.append(f'):{parentdist - node.dist}')\n", | |
" else:\n", | |
" newick.append(');')\n", | |
" newick = _scipy_tree_to_newick_list(node.get_left(), newick, node.dist, leaf_names)\n", | |
" newick.append(',')\n", | |
" newick = _scipy_tree_to_newick_list(node.get_right(), newick, node.dist, leaf_names)\n", | |
" newick.append('(')\n", | |
" return newick\n", | |
"\n", | |
"\n", | |
"def to_newick(tree: ClusterNode, leaf_names: List[str]) -> str:\n", | |
" \"\"\"Newick tree output string from SciPy hierarchical clustering tree\n", | |
"\n", | |
" Convert a SciPy ClusterNode tree to a Newick format string.\n", | |
" Use scipy.cluster.hierarchy.to_tree on a hierarchical clustering linkage matrix to create the root ClusterNode for the `tree` input of this function.\n", | |
"\n", | |
" Args:\n", | |
" tree (scipy.cluster.hierarchy.ClusterNode): Output of scipy.cluster.hierarchy.to_tree from hierarchical clustering linkage matrix\n", | |
" leaf_names (list of string): Leaf node names\n", | |
"\n", | |
" Returns:\n", | |
" (string): Newick output string\n", | |
" \"\"\"\n", | |
" newick_list = _scipy_tree_to_newick_list(tree, [], tree.dist, leaf_names)\n", | |
" return ''.join(newick_list[::-1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'((((((((((((((((((((r:0.30000000000000004,k:0.30000000000000004):0.0,l:0.30000000000000004):0.09999999999999998,z:0.4):0.09999999999999998,p:0.5):0.0,n:0.5):0.0,((s:0.4,e:0.4):0.0,o:0.4):0.09999999999999998):0.0,h:0.5):0.0,i:0.5):0.0,t:0.5):0.0,c:0.5):0.0,j:0.5):0.0,q:0.5):0.0,d:0.5):0.0,(w:0.4,f:0.4):0.09999999999999998):0.0,b:0.5):0.0,(y:0.30000000000000004,g:0.30000000000000004):0.19999999999999996):0.0,u:0.5):0.10000000000000009,a:0.6000000000000001):0.0,(x:0.5,m:0.5):0.10000000000000009):0.0,v:0.6000000000000001);'" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"to_newick(T, ascii_lowercase)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Minimum Spanning Tree from cgMLST profiles" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def mst_adjacency_list(dm: np.array, names: List[str]) -> List[Dict[str, Union[str, int, float]]]:\n", | |
" mst = minimum_spanning_tree(squareform(dm))\n", | |
" # scipy.sparse.find returns a tuple with 3 vectors (targets, sources and weights) when given an MST as input\n", | |
" sources, targets, weights = sp.sparse.find(mst)\n", | |
" # return a similar data structure as NetworkX json_graph - list of dicts\n", | |
" links = []\n", | |
" for s,t,w in zip(sources, targets, weights):\n", | |
" # undirected graph so order of source to target doesn't matter, however\n", | |
" # swap source and target ids if source id is larger than target id so\n", | |
" # that it can be rendered similarly as NetworkX MST graphs\n", | |
" if s > t:\n", | |
" s, t = (t, s)\n", | |
" links.append(dict(source=names[s], target=names[t], weight=w))\n", | |
" return links" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'source': 'a', 'target': 'p', 'weight': 0.6000000000000001},\n", | |
" {'source': 'b', 'target': 'w', 'weight': 0.5},\n", | |
" {'source': 'e', 'target': 'o', 'weight': 0.4},\n", | |
" {'source': 'e', 'target': 's', 'weight': 0.4},\n", | |
" {'source': 'f', 'target': 'w', 'weight': 0.4},\n", | |
" {'source': 'g', 'target': 'y', 'weight': 0.30000000000000004},\n", | |
" {'source': 'h', 'target': 'o', 'weight': 0.5},\n", | |
" {'source': 'i', 'target': 'w', 'weight': 0.5},\n", | |
" {'source': 'c', 'target': 'j', 'weight': 0.5},\n", | |
" {'source': 'l', 'target': 'r', 'weight': 0.30000000000000004},\n", | |
" {'source': 'k', 'target': 'p', 'weight': 0.5},\n", | |
" {'source': 'd', 'target': 'q', 'weight': 0.5},\n", | |
" {'source': 'f', 'target': 'q', 'weight': 0.5},\n", | |
" {'source': 'j', 'target': 'q', 'weight': 0.5},\n", | |
" {'source': 'k', 'target': 'r', 'weight': 0.30000000000000004},\n", | |
" {'source': 'i', 'target': 's', 'weight': 0.5},\n", | |
" {'source': 'c', 'target': 't', 'weight': 0.5},\n", | |
" {'source': 'n', 'target': 'u', 'weight': 0.5},\n", | |
" {'source': 't', 'target': 'v', 'weight': 0.6000000000000001},\n", | |
" {'source': 'm', 'target': 'x', 'weight': 0.5},\n", | |
" {'source': 'b', 'target': 'y', 'weight': 0.5},\n", | |
" {'source': 'l', 'target': 'z', 'weight': 0.4},\n", | |
" {'source': 'n', 'target': 'z', 'weight': 0.5},\n", | |
" {'source': 't', 'target': 'z', 'weight': 0.5},\n", | |
" {'source': 'x', 'target': 'z', 'weight': 0.6000000000000001}]" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dm = sp.spatial.distance.pdist(cgmlst_profiles, metric='hamming')\n", | |
"mst_links = mst_adjacency_list(dm=dm, names=list(ascii_lowercase))\n", | |
"mst_links" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def colours():\n", | |
" while True:\n", | |
" yield '0xFF0000'\n", | |
" yield '0x00FF00'\n", | |
" yield '0x0000FF'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g_colours = colours()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"jgraph_data = {'nodes': {name: {'color': g_colours.__next__()} for i, name in enumerate(ascii_lowercase)},\n", | |
" 'edges': [dict(source=edge['source'], target=edge['target'], size=edge['weight']*10) for edge in mst_links]}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'nodes': {'a': {'color': '0xFF0000'},\n", | |
" 'b': {'color': '0x00FF00'},\n", | |
" 'c': {'color': '0x0000FF'},\n", | |
" 'd': {'color': '0xFF0000'},\n", | |
" 'e': {'color': '0x00FF00'},\n", | |
" 'f': {'color': '0x0000FF'},\n", | |
" 'g': {'color': '0xFF0000'},\n", | |
" 'h': {'color': '0x00FF00'},\n", | |
" 'i': {'color': '0x0000FF'},\n", | |
" 'j': {'color': '0xFF0000'},\n", | |
" 'k': {'color': '0x00FF00'},\n", | |
" 'l': {'color': '0x0000FF'},\n", | |
" 'm': {'color': '0xFF0000'},\n", | |
" 'n': {'color': '0x00FF00'},\n", | |
" 'o': {'color': '0x0000FF'},\n", | |
" 'p': {'color': '0xFF0000'},\n", | |
" 'q': {'color': '0x00FF00'},\n", | |
" 'r': {'color': '0x0000FF'},\n", | |
" 's': {'color': '0xFF0000'},\n", | |
" 't': {'color': '0x00FF00'},\n", | |
" 'u': {'color': '0x0000FF'},\n", | |
" 'v': {'color': '0xFF0000'},\n", | |
" 'w': {'color': '0x00FF00'},\n", | |
" 'x': {'color': '0x0000FF'},\n", | |
" 'y': {'color': '0xFF0000'},\n", | |
" 'z': {'color': '0x00FF00'}},\n", | |
" 'edges': [{'source': 'a', 'target': 'p', 'size': 6.000000000000001},\n", | |
" {'source': 'b', 'target': 'w', 'size': 5.0},\n", | |
" {'source': 'e', 'target': 'o', 'size': 4.0},\n", | |
" {'source': 'e', 'target': 's', 'size': 4.0},\n", | |
" {'source': 'f', 'target': 'w', 'size': 4.0},\n", | |
" {'source': 'g', 'target': 'y', 'size': 3.0000000000000004},\n", | |
" {'source': 'h', 'target': 'o', 'size': 5.0},\n", | |
" {'source': 'i', 'target': 'w', 'size': 5.0},\n", | |
" {'source': 'c', 'target': 'j', 'size': 5.0},\n", | |
" {'source': 'l', 'target': 'r', 'size': 3.0000000000000004},\n", | |
" {'source': 'k', 'target': 'p', 'size': 5.0},\n", | |
" {'source': 'd', 'target': 'q', 'size': 5.0},\n", | |
" {'source': 'f', 'target': 'q', 'size': 5.0},\n", | |
" {'source': 'j', 'target': 'q', 'size': 5.0},\n", | |
" {'source': 'k', 'target': 'r', 'size': 3.0000000000000004},\n", | |
" {'source': 'i', 'target': 's', 'size': 5.0},\n", | |
" {'source': 'c', 'target': 't', 'size': 5.0},\n", | |
" {'source': 'n', 'target': 'u', 'size': 5.0},\n", | |
" {'source': 't', 'target': 'v', 'size': 6.000000000000001},\n", | |
" {'source': 'm', 'target': 'x', 'size': 5.0},\n", | |
" {'source': 'b', 'target': 'y', 'size': 5.0},\n", | |
" {'source': 'l', 'target': 'z', 'size': 4.0},\n", | |
" {'source': 'n', 'target': 'z', 'size': 5.0},\n", | |
" {'source': 't', 'target': 'z', 'size': 5.0},\n", | |
" {'source': 'x', 'target': 'z', 'size': 6.000000000000001}]}" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"jgraph_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div id=\"graph-f6fd0a4d-9ba8-4202-98cd-8c402d5cf690\"></div>\n", | |
" <script type=\"text/javascript\">\n", | |
" require.config({baseUrl: '/',\n", | |
" paths: {jgraph: ['nbextensions/jgraph.min', 'https://rawgit.com/patrickfuller/jgraph/master/js/build/jgraph.min']}});\n", | |
" require(['jgraph'], function () {\n", | |
" var $d = $('#graph-f6fd0a4d-9ba8-4202-98cd-8c402d5cf690');\n", | |
" $d.width(600); $d.height(400);\n", | |
" $d.jgraph = jQuery.extend({}, jgraph);\n", | |
" $d.jgraph.create($d, {nodeSize: 2.000000,\n", | |
" edgeSize: 0.250000,\n", | |
" defaultNodeColor: '0x5bc0de',\n", | |
" defaultEdgeColor: '0xaaaaaa',\n", | |
" shader: 'basic',\n", | |
" z: 100,\n", | |
" runOptimization: true,\n", | |
" directed: true,\n", | |
" showSave: false});\n", | |
" $d.jgraph.draw({\n", | |
" \"edges\": [\n", | |
" { \"size\": 6.000000000000001, \"source\": \"a\", \"target\": \"p\" },\n", | |
" { \"size\": 5.0, \"source\": \"b\", \"target\": \"w\" },\n", | |
" { \"size\": 4.0, \"source\": \"e\", \"target\": \"o\" },\n", | |
" { \"size\": 4.0, \"source\": \"e\", \"target\": \"s\" },\n", | |
" { \"size\": 4.0, \"source\": \"f\", \"target\": \"w\" },\n", | |
" { \"size\": 3.0000000000000004, \"source\": \"g\", \"target\": \"y\" },\n", | |
" { \"size\": 5.0, \"source\": \"h\", \"target\": \"o\" },\n", | |
" { \"size\": 5.0, \"source\": \"i\", \"target\": \"w\" },\n", | |
" { \"size\": 5.0, \"source\": \"c\", \"target\": \"j\" },\n", | |
" { \"size\": 3.0000000000000004, \"source\": \"l\", \"target\": \"r\" },\n", | |
" { \"size\": 5.0, \"source\": \"k\", \"target\": \"p\" },\n", | |
" { \"size\": 5.0, \"source\": \"d\", \"target\": \"q\" },\n", | |
" { \"size\": 5.0, \"source\": \"f\", \"target\": \"q\" },\n", | |
" { \"size\": 5.0, \"source\": \"j\", \"target\": \"q\" },\n", | |
" { \"size\": 3.0000000000000004, \"source\": \"k\", \"target\": \"r\" },\n", | |
" { \"size\": 5.0, \"source\": \"i\", \"target\": \"s\" },\n", | |
" { \"size\": 5.0, \"source\": \"c\", \"target\": \"t\" },\n", | |
" { \"size\": 5.0, \"source\": \"n\", \"target\": \"u\" },\n", | |
" { \"size\": 6.000000000000001, \"source\": \"t\", \"target\": \"v\" },\n", | |
" { \"size\": 5.0, \"source\": \"m\", \"target\": \"x\" },\n", | |
" { \"size\": 5.0, \"source\": \"b\", \"target\": \"y\" },\n", | |
" { \"size\": 4.0, \"source\": \"l\", \"target\": \"z\" },\n", | |
" { \"size\": 5.0, \"source\": \"n\", \"target\": \"z\" },\n", | |
" { \"size\": 5.0, \"source\": \"t\", \"target\": \"z\" },\n", | |
" { \"size\": 6.000000000000001, \"source\": \"x\", \"target\": \"z\" }\n", | |
" ],\n", | |
" \"nodes\": {\n", | |
" \"a\": { \"color\": \"0xFF0000\" },\n", | |
" \"b\": { \"color\": \"0x00FF00\" },\n", | |
" \"c\": { \"color\": \"0x0000FF\" },\n", | |
" \"d\": { \"color\": \"0xFF0000\" },\n", | |
" \"e\": { \"color\": \"0x00FF00\" },\n", | |
" \"f\": { \"color\": \"0x0000FF\" },\n", | |
" \"g\": { \"color\": \"0xFF0000\" },\n", | |
" \"h\": { \"color\": \"0x00FF00\" },\n", | |
" \"i\": { \"color\": \"0x0000FF\" },\n", | |
" \"j\": { \"color\": \"0xFF0000\" },\n", | |
" \"k\": { \"color\": \"0x00FF00\" },\n", | |
" \"l\": { \"color\": \"0x0000FF\" },\n", | |
" \"m\": { \"color\": \"0xFF0000\" },\n", | |
" \"n\": { \"color\": \"0x00FF00\" },\n", | |
" \"o\": { \"color\": \"0x0000FF\" },\n", | |
" \"p\": { \"color\": \"0xFF0000\" },\n", | |
" \"q\": { \"color\": \"0x00FF00\" },\n", | |
" \"r\": { \"color\": \"0x0000FF\" },\n", | |
" \"s\": { \"color\": \"0xFF0000\" },\n", | |
" \"t\": { \"color\": \"0x00FF00\" },\n", | |
" \"u\": { \"color\": \"0x0000FF\" },\n", | |
" \"v\": { \"color\": \"0xFF0000\" },\n", | |
" \"w\": { \"color\": \"0x00FF00\" },\n", | |
" \"x\": { \"color\": \"0x0000FF\" },\n", | |
" \"y\": { \"color\": \"0xFF0000\" },\n", | |
" \"z\": { \"color\": \"0x00FF00\" }\n", | |
" }\n", | |
"});\n", | |
"\n", | |
" $d.resizable({\n", | |
" aspectRatio: 600 / 400,\n", | |
" resize: function (evt, ui) {\n", | |
" $d.jgraph.renderer.setSize(ui.size.width,\n", | |
" ui.size.height);\n", | |
" }\n", | |
" });\n", | |
" });\n", | |
" </script>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"jgraph.draw(jgraph_data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Flat clusters at a number of distance thresholds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from scipy.cluster.hierarchy import fcluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4,\n", | |
" 2, 1, 2, 2], dtype=int32)" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"fcluster(Z, t=0.5, criterion='distance')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"distances = np.unique(dm)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"distances.sort()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])" | |
] | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"distances" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_clusters = pd.DataFrame([fcluster(Z, t=distance, criterion='distance') for distance in distances]).transpose()\n", | |
"df_clusters.index = list(ascii_lowercase)\n", | |
"df_clusters.columns = distances" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0.30000000000000004</th>\n", | |
" <th>0.4</th>\n", | |
" <th>0.5</th>\n", | |
" <th>0.6000000000000001</th>\n", | |
" <th>0.7000000000000001</th>\n", | |
" <th>0.8</th>\n", | |
" <th>0.9</th>\n", | |
" <th>1.0</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td>22</td>\n", | |
" <td>18</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td>20</td>\n", | |
" <td>16</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td>16</td>\n", | |
" <td>12</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td>19</td>\n", | |
" <td>15</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>e</th>\n", | |
" <td>6</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>f</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>g</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>h</th>\n", | |
" <td>13</td>\n", | |
" <td>9</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>i</th>\n", | |
" <td>14</td>\n", | |
" <td>10</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>j</th>\n", | |
" <td>17</td>\n", | |
" <td>13</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>k</th>\n", | |
" <td>9</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>l</th>\n", | |
" <td>9</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>m</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>n</th>\n", | |
" <td>12</td>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>o</th>\n", | |
" <td>8</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>p</th>\n", | |
" <td>11</td>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>q</th>\n", | |
" <td>18</td>\n", | |
" <td>14</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>r</th>\n", | |
" <td>9</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>s</th>\n", | |
" <td>7</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>t</th>\n", | |
" <td>15</td>\n", | |
" <td>11</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>u</th>\n", | |
" <td>21</td>\n", | |
" <td>17</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>v</th>\n", | |
" <td>23</td>\n", | |
" <td>19</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>w</th>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>x</th>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>y</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>z</th>\n", | |
" <td>10</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0\n", | |
"a 22 18 3 1 1 1 1 1\n", | |
"b 20 16 2 1 1 1 1 1\n", | |
"c 16 12 2 1 1 1 1 1\n", | |
"d 19 15 2 1 1 1 1 1\n", | |
"e 6 5 2 1 1 1 1 1\n", | |
"f 4 4 2 1 1 1 1 1\n", | |
"g 3 3 2 1 1 1 1 1\n", | |
"h 13 9 2 1 1 1 1 1\n", | |
"i 14 10 2 1 1 1 1 1\n", | |
"j 17 13 2 1 1 1 1 1\n", | |
"k 9 6 2 1 1 1 1 1\n", | |
"l 9 6 2 1 1 1 1 1\n", | |
"m 1 1 1 1 1 1 1 1\n", | |
"n 12 8 2 1 1 1 1 1\n", | |
"o 8 5 2 1 1 1 1 1\n", | |
"p 11 7 2 1 1 1 1 1\n", | |
"q 18 14 2 1 1 1 1 1\n", | |
"r 9 6 2 1 1 1 1 1\n", | |
"s 7 5 2 1 1 1 1 1\n", | |
"t 15 11 2 1 1 1 1 1\n", | |
"u 21 17 2 1 1 1 1 1\n", | |
"v 23 19 4 1 1 1 1 1\n", | |
"w 5 4 2 1 1 1 1 1\n", | |
"x 2 2 1 1 1 1 1 1\n", | |
"y 3 3 2 1 1 1 1 1\n", | |
"z 10 6 2 1 1 1 1 1" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_clusters" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment