Created
October 5, 2015 23:24
-
-
Save benbowen/81ad833b54586cc50ee5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# http://metacyc.org/META/class-instances?object=Compounds\n", | |
"# Smart Table\n", | |
"# Add the following columns\n", | |
"# export with weblinks" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import sys,os\n", | |
"sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )\n", | |
"from metatlas import metatlas_objects as metob\n", | |
"import csv\n", | |
"\n", | |
"\n", | |
"curr_ld_lib_path = ''\n", | |
"\n", | |
"os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'\n", | |
"import sys\n", | |
"# sys.path.remove('/anaconda/lib/python2.7/site-packages')\n", | |
"sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')\n", | |
"sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )\n", | |
"\n", | |
"from rdkit import Chem\n", | |
"# from rdkit.Chem.rdMolDescriptors import ExactMolWt\n", | |
"from rdkit.Chem import Descriptors\n", | |
"from rdkit.Chem import rdMolDescriptors\n", | |
"from rdkit.Chem import AllChem\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['KEGG', 'Compounds', 'Monoisotopic-Molecular-Weight', 'Wikipedia', 'PubChem', 'Names', 'Common-Name', 'InChI', 'Object ID', 'LIPID MAPS']\n" | |
] | |
} | |
], | |
"source": [ | |
"\"\"\"\n", | |
"Create a list of dictionaries. Each item in the list will be a dict containin the metacyc information.\n", | |
"\"\"\"\n", | |
"datafile = 'Selected_rows_from_All_instances_of_Compounds_in_MetaCyc.txt'\n", | |
"with open(datafile, 'rUb') as f:\n", | |
" metacyc_compounds = list(csv.DictReader(f, dialect='excel-tab'))\n", | |
"print metacyc_compounds[0].keys()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"\"\"\" contribution from Hans de Winter \"\"\"\n", | |
"def _InitialiseNeutralisationReactions():\n", | |
" patts= (\n", | |
" # Imidazoles\n", | |
" ('[n+;H]','n'),\n", | |
" # Amines\n", | |
" ('[N+;!H0]','N'),\n", | |
" # Carboxylic acids and alcohols\n", | |
" ('[$([O-]);!$([O-][#7])]','O'),\n", | |
" # Thiols\n", | |
" ('[S-;X1]','S'),\n", | |
" # Sulfonamides\n", | |
" ('[$([N-;X2]S(=O)=O)]','N'),\n", | |
" # Enamines\n", | |
" ('[$([N-;X2][C,N]=C)]','N'),\n", | |
" # Tetrazoles\n", | |
" ('[n-]','[nH]'),\n", | |
" # Sulfoxides\n", | |
" ('[$([S-]=O)]','S'),\n", | |
" # Amides\n", | |
" ('[$([N-]C=O)]','N'),\n", | |
" )\n", | |
" return [(Chem.MolFromSmarts(x),Chem.MolFromSmiles(y,False)) for x,y in patts]\n", | |
"\n", | |
"_reactions=None\n", | |
"def NeutraliseCharges(mol, reactions=None):\n", | |
" global _reactions\n", | |
" if reactions is None:\n", | |
" if _reactions is None:\n", | |
" _reactions=_InitialiseNeutralisationReactions()\n", | |
" reactions=_reactions\n", | |
"# mol = Chem.MolFromSmiles(smiles)\n", | |
" replaced = False\n", | |
" for i,(reactant, product) in enumerate(reactions):\n", | |
" while mol.HasSubstructMatch(reactant):\n", | |
" replaced = True\n", | |
" rms = AllChem.ReplaceSubstructs(mol, reactant, product)\n", | |
" rms_smiles = Chem.MolToSmiles(rms[0])\n", | |
" mol = Chem.MolFromSmiles(rms_smiles)\n", | |
" if replaced:\n", | |
" return (mol, True) #Chem.MolToSmiles(mol,True)\n", | |
" else:\n", | |
" return (mol, False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"for myCompound in metacyc_compounds:\n", | |
" myMol = Chem.MolFromInchi(myCompound['InChI'])\n", | |
" try:\n", | |
" myCharge = Chem.GetFormalCharge(myMol)\n", | |
" if myCharge != 0:\n", | |
" (myMol, neutralised) = NeutraliseCharges(myMol)\n", | |
"# if neutralised == False:\n", | |
"# print neutralised, myCompound['Common-Name']\n", | |
"# print rdMolDescriptors.CalcMolFormula(neutral_mol)\n", | |
" c = metob.Compound()\n", | |
" c.InChI = myCompound['InChI']\n", | |
" c.formula = rdMolDescriptors.CalcMolFormula(myMol)\n", | |
" c.MonoIsotopic_molecular_weight = Descriptors.ExactMolWt(myMol)\n", | |
" c.description = ''\n", | |
" c.name = myCompound['Common-Name']\n", | |
" c.store()\n", | |
" \n", | |
" except:\n", | |
" print \"Can not parse \", myCompound['Common-Name']\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"allCompounds = metob.queryDatabase('Compound',name = '%glycine%')\n", | |
"print len(allCompounds)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"actinonin\n", | |
"InChI=1S/C19H35N3O5/c1-4-5-6-8-14(11-16(24)21-27)18(25)20-17(13(2)3)19(26)22-10-7-9-15(22)12-23/h13-15,17,23,27H,4-12H2,1-3H3,(H,20,25)(H,21,24)/t14-,15+,17+/m1/s1\n", | |
"actinomycin D\n", | |
"InChI=1S/C62H86N12O16/c1-27(2)42-59(84)73-23-17-19-36(73)57(82)69(13)25-38(75)71(15)48(29(5)6)61(86)88-33(11)44(55(80)65-42)67-53(78)35-22-21-31(9)51-46(35)64-47-40(41(63)50(77)32(10)52(47)90-51)54(79)68-45-34(12)89-62(87)49(30(7)8)72(16)39(76)26-70(14)58(83)37-20-18-24-74(37)60(85)43(28(3)4)66-56(45)81/h21-22,27-30,33-34,36-37,42-45,48-49H,17-20,23-26,63H2,1-16H3,(H,65,80)(H,66,81)(H,67,78)(H,68,79)\n", | |
"actinomycinate\n", | |
"InChI=1S/C62H106N12O18/c1-27(2)42(59(86)73-23-17-19-36(73)57(84)69(13)25-38(77)71(15)48(29(5)6)61(88)89)65-55(82)44(33(11)75)67-53(80)35-22-21-31(9)51-46(35)64-47-40(41(63)50(79)32(10)52(47)92-51)54(81)68-45(34(12)76)56(83)66-43(28(3)4)60(87)74-24-18-20-37(74)58(85)70(14)26-39(78)72(16)49(30(7)8)62(90)91/h21-22,27-30,33-34,36-39,42-45,48-49,57-62,75-78,84-91H,17-20,23-26,63H2,1-16H3,(H,65,82)(H,66,83)(H,67,80)(H,68,81)/p+3/t33?,34?,36-,37-,38?,39?,42+,43+,44?,45?,48?,49?,57?,58?,59?,60?/m0/s1\n", | |
"actinomycinic monolactone\n", | |
"InChI=1S/C62H88N12O17/c1-27(2)42(59(85)73-23-17-19-36(73)57(83)69(13)25-38(76)71(15)48(29(5)6)61(87)88)65-55(81)44(33(11)75)67-54(80)40-41(63)50(78)32(10)52-47(40)64-46-35(22-21-31(9)51(46)91-52)53(79)68-45-34(12)90-62(89)49(30(7)8)72(16)39(77)26-70(14)58(84)37-20-18-24-74(37)60(86)43(28(3)4)66-56(45)82/h21-22,27-30,33-34,36-37,42-45,48-49,75H,17-20,23-26,63H2,1-16H3,(H,65,81)(H,66,82)(H,67,80)(H,68,79)(H,87,88)/p-1/t33?,34?,36-,37-,42+,43+,44-,45?,48-,49?/m0/s1\n", | |
"actinomycin\n", | |
"InChI=1S/C62H86N12O16/c1-27(2)42-59(84)73-23-17-19-36(73)57(82)69(13)25-38(75)71(15)48(29(5)6)61(86)88-33(11)44(55(80)65-42)67-53(78)35-22-21-31(9)51-46(35)64-47-40(41(63)50(77)32(10)52(47)90-51)54(79)68-45-34(12)89-62(87)49(30(7)8)72(16)39(76)26-70(14)58(83)37-20-18-24-74(37)60(85)43(28(3)4)66-56(45)81/h21-22,27-30,33-34,36-37,42-45,48-49H,17-20,23-26,63H2,1-16H3,(H,65,80)(H,66,81)(H,67,78)(H,68,79)/t33?,34?,36-,37-,42+,43+,44?,45?,48?,49?/m0/s1\n", | |
"galactinol\n", | |
"InChI=1S/C12H22O11/c13-1-2-3(14)4(15)10(21)12(22-2)23-11-8(19)6(17)5(16)7(18)9(11)20/h2-21H,1H2/t2-,3+,4+,5-,6-,7-,8+,9-,10-,11-,12-/m1/s1\n", | |
"isogalactinol\n", | |
"InChI=1S/C12H22O11/c13-1-2-3(14)4(15)10(21)12(22-2)23-11-8(19)6(17)5(16)7(18)9(11)20/h2-21H,1H2/t2-,3+,4+,5-,6-,7+,8-,9-,10-,11-,12-/m1/s1\n", | |
"actinorhodin\n", | |
"InChI=1S/C32H26O14/c1-9-21-15(3-11(45-9)5-19(35)36)29(41)23-17(33)7-13(27(39)25(23)31(21)43)14-8-18(34)24-26(28(14)40)32(44)22-10(2)46-12(6-20(37)38)4-16(22)30(24)42/h7-12,33-34,39-40H,3-6H2,1-2H3,(H,35,36)(H,37,38)/p-2/t9-,10-,11+,12+/m1/s1\n", | |
"actinorhodin intermediate\n", | |
"InChI=1S/C32H26O12/c1-11-23-19(7-13(43-11)9-21(33)34)27(37)17-5-3-15(29(39)25(17)31(23)41)16-4-6-18-26(30(16)40)32(42)24-12(2)44-14(10-22(35)36)8-20(24)28(18)38/h3-6,11-14,39-40H,7-10H2,1-2H3,(H,33,34)(H,35,36)/p-2/t11-,12-,13+,14+/m1/s1\n", | |
"tuberactinomycin O\n", | |
"InChI=1S/C25H43N13O9/c26-4-1-2-11(27)6-17(41)33-13-7-31-23(46)18(12-3-5-30-24(28)37-12)38-20(43)14(8-32-25(29)47)34-21(44)15(9-39)36-22(45)16(10-40)35-19(13)42/h8,11-13,15-16,18,39-40H,1-7,9-10,26-27H2,(H,31,46)(H,33,41)(H,34,44)(H,35,42)(H,36,45)(H,38,43)(H3,28,30,37)(H3,29,32,47)/p+3/b14-8-/t11-,12+,13-,15-,16-,18-/m0/s1\n", | |
"tuberactinomycin A\n", | |
"InChI=1S/C25H43N13O11/c26-2-1-15(41)9(27)3-16(42)32-11-5-30-23(48)18(10-4-17(43)37-24(28)36-10)38-20(45)12(6-31-25(29)49)33-21(46)13(7-39)35-22(47)14(8-40)34-19(11)44/h6,9-11,13-15,17-18,39-41,43H,1-5,7-8,26-27H2,(H,30,48)(H,32,42)(H,33,46)(H,34,44)(H,35,47)(H,38,45)(H3,28,36,37)(H3,29,31,49)/p+3/b12-6-/t9-,10-,11+,13+,14+,15-,17+,18+/m1/s1\n" | |
] | |
} | |
], | |
"source": [ | |
"for c in allCompounds:\n", | |
" print c.name\n", | |
" print c.InChI" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment