Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save benbowen/81ad833b54586cc50ee5 to your computer and use it in GitHub Desktop.
Save benbowen/81ad833b54586cc50ee5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# http://metacyc.org/META/class-instances?object=Compounds\n",
"# Smart Table\n",
"# Add the following columns\n",
"# export with weblinks"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import sys,os\n",
"sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )\n",
"from metatlas import metatlas_objects as metob\n",
"import csv\n",
"\n",
"\n",
"curr_ld_lib_path = ''\n",
"\n",
"os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'\n",
"import sys\n",
"# sys.path.remove('/anaconda/lib/python2.7/site-packages')\n",
"sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')\n",
"sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )\n",
"\n",
"from rdkit import Chem\n",
"# from rdkit.Chem.rdMolDescriptors import ExactMolWt\n",
"from rdkit.Chem import Descriptors\n",
"from rdkit.Chem import rdMolDescriptors\n",
"from rdkit.Chem import AllChem\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['KEGG', 'Compounds', 'Monoisotopic-Molecular-Weight', 'Wikipedia', 'PubChem', 'Names', 'Common-Name', 'InChI', 'Object ID', 'LIPID MAPS']\n"
]
}
],
"source": [
"\"\"\"\n",
"Create a list of dictionaries. Each item in the list will be a dict containin the metacyc information.\n",
"\"\"\"\n",
"datafile = 'Selected_rows_from_All_instances_of_Compounds_in_MetaCyc.txt'\n",
"with open(datafile, 'rUb') as f:\n",
" metacyc_compounds = list(csv.DictReader(f, dialect='excel-tab'))\n",
"print metacyc_compounds[0].keys()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"\"\"\" contribution from Hans de Winter \"\"\"\n",
"def _InitialiseNeutralisationReactions():\n",
" patts= (\n",
" # Imidazoles\n",
" ('[n+;H]','n'),\n",
" # Amines\n",
" ('[N+;!H0]','N'),\n",
" # Carboxylic acids and alcohols\n",
" ('[$([O-]);!$([O-][#7])]','O'),\n",
" # Thiols\n",
" ('[S-;X1]','S'),\n",
" # Sulfonamides\n",
" ('[$([N-;X2]S(=O)=O)]','N'),\n",
" # Enamines\n",
" ('[$([N-;X2][C,N]=C)]','N'),\n",
" # Tetrazoles\n",
" ('[n-]','[nH]'),\n",
" # Sulfoxides\n",
" ('[$([S-]=O)]','S'),\n",
" # Amides\n",
" ('[$([N-]C=O)]','N'),\n",
" )\n",
" return [(Chem.MolFromSmarts(x),Chem.MolFromSmiles(y,False)) for x,y in patts]\n",
"\n",
"_reactions=None\n",
"def NeutraliseCharges(mol, reactions=None):\n",
" global _reactions\n",
" if reactions is None:\n",
" if _reactions is None:\n",
" _reactions=_InitialiseNeutralisationReactions()\n",
" reactions=_reactions\n",
"# mol = Chem.MolFromSmiles(smiles)\n",
" replaced = False\n",
" for i,(reactant, product) in enumerate(reactions):\n",
" while mol.HasSubstructMatch(reactant):\n",
" replaced = True\n",
" rms = AllChem.ReplaceSubstructs(mol, reactant, product)\n",
" rms_smiles = Chem.MolToSmiles(rms[0])\n",
" mol = Chem.MolFromSmiles(rms_smiles)\n",
" if replaced:\n",
" return (mol, True) #Chem.MolToSmiles(mol,True)\n",
" else:\n",
" return (mol, False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"for myCompound in metacyc_compounds:\n",
" myMol = Chem.MolFromInchi(myCompound['InChI'])\n",
" try:\n",
" myCharge = Chem.GetFormalCharge(myMol)\n",
" if myCharge != 0:\n",
" (myMol, neutralised) = NeutraliseCharges(myMol)\n",
"# if neutralised == False:\n",
"# print neutralised, myCompound['Common-Name']\n",
"# print rdMolDescriptors.CalcMolFormula(neutral_mol)\n",
" c = metob.Compound()\n",
" c.InChI = myCompound['InChI']\n",
" c.formula = rdMolDescriptors.CalcMolFormula(myMol)\n",
" c.MonoIsotopic_molecular_weight = Descriptors.ExactMolWt(myMol)\n",
" c.description = ''\n",
" c.name = myCompound['Common-Name']\n",
" c.store()\n",
" \n",
" except:\n",
" print \"Can not parse \", myCompound['Common-Name']\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"allCompounds = metob.queryDatabase('Compound',name = '%glycine%')\n",
"print len(allCompounds)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"actinonin\n",
"InChI=1S/C19H35N3O5/c1-4-5-6-8-14(11-16(24)21-27)18(25)20-17(13(2)3)19(26)22-10-7-9-15(22)12-23/h13-15,17,23,27H,4-12H2,1-3H3,(H,20,25)(H,21,24)/t14-,15+,17+/m1/s1\n",
"actinomycin D\n",
"InChI=1S/C62H86N12O16/c1-27(2)42-59(84)73-23-17-19-36(73)57(82)69(13)25-38(75)71(15)48(29(5)6)61(86)88-33(11)44(55(80)65-42)67-53(78)35-22-21-31(9)51-46(35)64-47-40(41(63)50(77)32(10)52(47)90-51)54(79)68-45-34(12)89-62(87)49(30(7)8)72(16)39(76)26-70(14)58(83)37-20-18-24-74(37)60(85)43(28(3)4)66-56(45)81/h21-22,27-30,33-34,36-37,42-45,48-49H,17-20,23-26,63H2,1-16H3,(H,65,80)(H,66,81)(H,67,78)(H,68,79)\n",
"actinomycinate\n",
"InChI=1S/C62H106N12O18/c1-27(2)42(59(86)73-23-17-19-36(73)57(84)69(13)25-38(77)71(15)48(29(5)6)61(88)89)65-55(82)44(33(11)75)67-53(80)35-22-21-31(9)51-46(35)64-47-40(41(63)50(79)32(10)52(47)92-51)54(81)68-45(34(12)76)56(83)66-43(28(3)4)60(87)74-24-18-20-37(74)58(85)70(14)26-39(78)72(16)49(30(7)8)62(90)91/h21-22,27-30,33-34,36-39,42-45,48-49,57-62,75-78,84-91H,17-20,23-26,63H2,1-16H3,(H,65,82)(H,66,83)(H,67,80)(H,68,81)/p+3/t33?,34?,36-,37-,38?,39?,42+,43+,44?,45?,48?,49?,57?,58?,59?,60?/m0/s1\n",
"actinomycinic monolactone\n",
"InChI=1S/C62H88N12O17/c1-27(2)42(59(85)73-23-17-19-36(73)57(83)69(13)25-38(76)71(15)48(29(5)6)61(87)88)65-55(81)44(33(11)75)67-54(80)40-41(63)50(78)32(10)52-47(40)64-46-35(22-21-31(9)51(46)91-52)53(79)68-45-34(12)90-62(89)49(30(7)8)72(16)39(77)26-70(14)58(84)37-20-18-24-74(37)60(86)43(28(3)4)66-56(45)82/h21-22,27-30,33-34,36-37,42-45,48-49,75H,17-20,23-26,63H2,1-16H3,(H,65,81)(H,66,82)(H,67,80)(H,68,79)(H,87,88)/p-1/t33?,34?,36-,37-,42+,43+,44-,45?,48-,49?/m0/s1\n",
"actinomycin\n",
"InChI=1S/C62H86N12O16/c1-27(2)42-59(84)73-23-17-19-36(73)57(82)69(13)25-38(75)71(15)48(29(5)6)61(86)88-33(11)44(55(80)65-42)67-53(78)35-22-21-31(9)51-46(35)64-47-40(41(63)50(77)32(10)52(47)90-51)54(79)68-45-34(12)89-62(87)49(30(7)8)72(16)39(76)26-70(14)58(83)37-20-18-24-74(37)60(85)43(28(3)4)66-56(45)81/h21-22,27-30,33-34,36-37,42-45,48-49H,17-20,23-26,63H2,1-16H3,(H,65,80)(H,66,81)(H,67,78)(H,68,79)/t33?,34?,36-,37-,42+,43+,44?,45?,48?,49?/m0/s1\n",
"galactinol\n",
"InChI=1S/C12H22O11/c13-1-2-3(14)4(15)10(21)12(22-2)23-11-8(19)6(17)5(16)7(18)9(11)20/h2-21H,1H2/t2-,3+,4+,5-,6-,7-,8+,9-,10-,11-,12-/m1/s1\n",
"isogalactinol\n",
"InChI=1S/C12H22O11/c13-1-2-3(14)4(15)10(21)12(22-2)23-11-8(19)6(17)5(16)7(18)9(11)20/h2-21H,1H2/t2-,3+,4+,5-,6-,7+,8-,9-,10-,11-,12-/m1/s1\n",
"actinorhodin\n",
"InChI=1S/C32H26O14/c1-9-21-15(3-11(45-9)5-19(35)36)29(41)23-17(33)7-13(27(39)25(23)31(21)43)14-8-18(34)24-26(28(14)40)32(44)22-10(2)46-12(6-20(37)38)4-16(22)30(24)42/h7-12,33-34,39-40H,3-6H2,1-2H3,(H,35,36)(H,37,38)/p-2/t9-,10-,11+,12+/m1/s1\n",
"actinorhodin intermediate\n",
"InChI=1S/C32H26O12/c1-11-23-19(7-13(43-11)9-21(33)34)27(37)17-5-3-15(29(39)25(17)31(23)41)16-4-6-18-26(30(16)40)32(42)24-12(2)44-14(10-22(35)36)8-20(24)28(18)38/h3-6,11-14,39-40H,7-10H2,1-2H3,(H,33,34)(H,35,36)/p-2/t11-,12-,13+,14+/m1/s1\n",
"tuberactinomycin O\n",
"InChI=1S/C25H43N13O9/c26-4-1-2-11(27)6-17(41)33-13-7-31-23(46)18(12-3-5-30-24(28)37-12)38-20(43)14(8-32-25(29)47)34-21(44)15(9-39)36-22(45)16(10-40)35-19(13)42/h8,11-13,15-16,18,39-40H,1-7,9-10,26-27H2,(H,31,46)(H,33,41)(H,34,44)(H,35,42)(H,36,45)(H,38,43)(H3,28,30,37)(H3,29,32,47)/p+3/b14-8-/t11-,12+,13-,15-,16-,18-/m0/s1\n",
"tuberactinomycin A\n",
"InChI=1S/C25H43N13O11/c26-2-1-15(41)9(27)3-16(42)32-11-5-30-23(48)18(10-4-17(43)37-24(28)36-10)38-20(45)12(6-31-25(29)49)33-21(46)13(7-39)35-22(47)14(8-40)34-19(11)44/h6,9-11,13-15,17-18,39-41,43H,1-5,7-8,26-27H2,(H,30,48)(H,32,42)(H,33,46)(H,34,44)(H,35,47)(H,38,45)(H3,28,36,37)(H3,29,31,49)/p+3/b12-6-/t9-,10-,11+,13+,14+,15-,17+,18+/m1/s1\n"
]
}
],
"source": [
"for c in allCompounds:\n",
" print c.name\n",
" print c.InChI"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment