Skip to content

Instantly share code, notes, and snippets.

@ravila4
Created March 1, 2018 16:43
Show Gist options
  • Save ravila4/272cf1dccf322f94487d5c743af5e317 to your computer and use it in GitHub Desktop.
Save ravila4/272cf1dccf322f94487d5c743af5e317 to your computer and use it in GitHub Desktop.
Featurizer experiments using deepchem
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Featurizing 0 / 1\n",
"TIMING: Writing ligand took 0.001 s\n",
"TIMING: Writing protein took 0.008 s\n",
"TIMING: Loading protein coordinates took 15.259 s\n",
"TIMING: Loading ligand coordinates took 0.056 s\n",
"TIMING: Centroid processing took 0.000 s\n"
]
}
],
"source": [
"import deepchem as dc\n",
"from deepchem.feat.rdkit_grid_featurizer import RdkitGridFeaturizer\n",
"\n",
"GridFeaturizer = RdkitGridFeaturizer(voxel_width=16.0, feature_types=[\"all_combined\"],\n",
" ecfp_power=5, splif_power=5, parallel=True, flatten=True)\n",
"features = GridFeaturizer.featurize_complexes(mol_files=[\"suv.sdf\"], protein_pdbs=[\"4s0v.pdb\"])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1.\n",
" 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1.\n",
" 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 0.\n",
" 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1. 0.\n",
" 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1.\n",
" 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 0. 5. 1. 1. 2. 5. 2. 4. 3. 14. 4. 1. 9. 5. 3. 1. 1.\n",
" 6. 1. 0. 4. 7. 2. 4. 7. 18. 2. 1. 15. 4. 1. 1. 27. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 2. 2. 4. 0. 6. 2. 0. 4. 4. 0. 0. 8. 8. 4. 6. 2. 0. 2.\n",
" 0. 2. 2. 0. 2. 2. 2. 4. 8. 0. 4. 2. 16. 10. 10. 22. 22. 48.\n",
" 16. 28. 18. 54. 23. 34. 24. 42. 40. 29. 35. 26. 8. 32. 16. 24. 33. 18.\n",
" 32. 16. 20. 24. 17. 28. 31. 28.]]\n",
"Num features: 296\n"
]
}
],
"source": [
"print(features)\n",
"print(\"Num features:\", len(features[0]))"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"import glob\n",
"receptors = glob.glob(\"./pdbbind_2016_refined/*/*protein.pdb\")\n",
"ligands = glob.glob(\"./pdbbind_2016_refined/*/*ligand.sdf\")"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Featurizing 0 / 5\n",
"TIMING: Writing ligand took 0.000 s\n",
"TIMING: Writing protein took 0.014 s\n",
"TIMING: Loading protein coordinates took 348.048 s\n",
"TIMING: Loading ligand coordinates took 0.067 s\n",
"TIMING: Centroid processing took 0.001 s\n",
"TIMING: Writing ligand took 0.000 s\n",
"TIMING: Writing protein took 0.005 s\n",
"TIMING: Loading protein coordinates took 15.409 s\n",
"TIMING: Loading ligand coordinates took 0.110 s\n",
"TIMING: Centroid processing took 0.000 s\n",
"TIMING: Writing ligand took 0.000 s\n",
"TIMING: Writing protein took 0.002 s\n",
"TIMING: Loading protein coordinates took 9.256 s\n",
"TIMING: Loading ligand coordinates took 0.082 s\n",
"TIMING: Centroid processing took 0.000 s\n",
"TIMING: Writing ligand took 0.000 s\n",
"TIMING: Writing protein took 0.002 s\n",
"TIMING: Loading protein coordinates took 6.694 s\n",
"TIMING: Loading ligand coordinates took 0.077 s\n",
"TIMING: Centroid processing took 0.000 s\n",
"TIMING: Writing ligand took 0.000 s\n",
"TIMING: Writing protein took 0.003 s\n",
"TIMING: Loading protein coordinates took 6.006 s\n",
"TIMING: Loading ligand coordinates took 0.060 s\n",
"TIMING: Centroid processing took 0.000 s\n"
]
}
],
"source": [
"features = GridFeaturizer.featurize_complexes(ligands[:5], receptors[:5])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 1. 1. 0. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 1.\n",
" 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 0. 0. 0.\n",
" 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.\n",
" 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1.\n",
" 1. 0. 1. 1. 1. 1. 0. 0. 1. 1. 0. 0. 0. 1. 1. 1. 1. 0.\n",
" 1. 1. 0. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 0. 2. 6. 0. 0. 3. 0. 1. 11. 7. 1. 0. 1. 1. 3. 0. 1.\n",
" 2. 1. 1. 0. 2. 4. 2. 6. 5. 2. 4. 6. 0. 0. 4. 15. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 2. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 4. 0.\n",
" 0. 0. 0. 8. 0. 2. 14. 6. 0. 8. 2. 4. 4. 0. 0. 4. 2. 0.\n",
" 0. 0. 2. 8. 6. 2. 0. 2. 8. 0. 2. 2. 26. 0. 10. 12. 0. 10.\n",
" 18. 4. 34. 8. 12. 10. 26. 12. 6. 2. 22. 2. 34. 4. 16. 8. 4. 20.\n",
" 20. 10. 4. 8. 20. 4. 38. 34.]\n",
"[ 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0.\n",
" 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0.\n",
" 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 0. 1.\n",
" 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1.\n",
" 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 0.\n",
" 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 1.\n",
" 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 0. 6. 9. 2. 5.\n",
" 7. 4. 2. 5. 43. 6. 3. 10. 5. 2. 4. 0.\n",
" 20. 11. 5. 4. 11. 19. 4. 20. 29. 6. 1. 28.\n",
" 6. 0. 5. 19. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 10. 2. 2. 0. 0. 2. 2. 0. 4. 2. 2. 8.\n",
" 0. 4. 0. 0. 0. 2. 2. 0. 4. 6. 0. 13.\n",
" 2. 0. 0. 10. 2. 18. 4. 12. 42. 14. 10. 8.\n",
" 4. 17. 12. 8. 17. 14. 12. 18. 12. 14. 10. 16.\n",
" 22. 2. 10. 12. 6. 10. 17. 49. 26. 10. 26. 20.\n",
" 38. 69. 36. 66. -31. 63. 67. 54. 48. 73. 72. 61.\n",
" 87. 87. 87. 51. 80. 103. 101. 82. 79. 40. 66. 69.\n",
" 58. 75. 75. -90. 99. 60. 126. -122.]\n",
"[ 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1.\n",
" 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1.\n",
" 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1.\n",
" 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1.\n",
" 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1.\n",
" 0. 1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 1.\n",
" 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 0. 1. 0. 3. 8. 1. 1. 0. 2. 25. 3. 0. 13. 4. 0. 0. 0.\n",
" 25. 5. 2. 0. 12. 11. 4. 11. 7. 1. 0. 3. 6. 0. 2. 3. 0. 0.\n",
" 0. 0. 4. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 4.\n",
" 0. 6. 0. 0. 0. 0. 6. 0. 4. 0. 2. 0. 2. 0. 0. 0. 12. 14.\n",
" 16. 4. 0. 6. 2. 8. 2. 0. 4. 6. 0. 4. 4. 2. 0. 16. 0. 4.\n",
" 0. 0. 8. 0. 12. 10. 2. 0. 16. 0. 0. 4. 34. 36. 46. 14. 8. 30.\n",
" 44. 58. 32. 16. 16. 4. 12. 12. 32. 36. 22. 68. 12. 40. 22. 10. 36. 8.\n",
" 36. 62. 38. 8. 46. 2. 18. 20.]\n",
"[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0.\n",
" 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 0.\n",
" 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0.\n",
" 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0.\n",
" 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 0. 3. 0. 5. 5. 3. 3. 3. 2. 28. 6. 5. 17. 4. 6. 1. 8.\n",
" 27. 8. 1. 10. 15. 8. 3. 7. 28. 0. 0. 14. 3. 2. 1. 15. 0. 0.\n",
" 0. 0. 0. 0. 2. 2. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 2. 2. 0. 0. 4. 0. 4. 0. 0. 4. 0. 6. 0. 0. 0. 2. 6.\n",
" 8. 2. 4. 20. 4. 3. 6. 16. 6. 10. 8. 10. 8. 16. 6. 14. 0. 10.\n",
" 10. 8. 11. 6. 12. 10. 12. 10. 8. 2. 6. 6. 55. 56. 42. 47. 51. 64.\n",
" 62. 68. 20. 60. 57. 56. 39. 41. 55. 61. 48. 88. 32. 80. 51. 33. 56. 42.\n",
" 73. 59. 81. 83. 66. 40. 51. 60.]\n",
"[ 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1.\n",
" 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 1. 1. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0.\n",
" 1. 1. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
" 1. 0. 5. 4. 1. 2. 0. 0. 0. 4. 6. 2. 3. 0. 1. 2. 4. 0.\n",
" 8. 5. 4. 6. 3. 0. 6. 14. 16. 0. 3. 9. 1. 0. 2. 21. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 4. 4. 8.\n",
" 0. 0. 0. 0. 4. 8. 0. 12. 8. 0. 4. 22. 0. 0. 4. 0. 0. 0.\n",
" 4. 8. 0. 0. 2. 12. 2. 4. 6. 12. 6. 2. 36. 60. 20. 12. 8. 0.\n",
" 24. 54. 8. 40. 46. 24. 30. 90. 8. 14. 18. 6. 12. 16. 18. 58. 6. 26.\n",
" 20. 52. 18. 26. 38. 88. 38. 28.]\n"
]
}
],
"source": [
"for feature in features:\n",
" print(feature)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment